diff options
author | fche <fche> | 2007-08-08 03:36:25 +0000 |
---|---|---|
committer | fche <fche> | 2007-08-08 03:36:25 +0000 |
commit | 3f99432cc11977a4345c881bed3aa60a1a614238 (patch) | |
tree | 70dc99d601a65cec051658402531ba39fa2b2dad | |
parent | 98be953834c60aaa2ae7504890f1cf11815e558a (diff) | |
download | systemtap-steved-3f99432cc11977a4345c881bed3aa60a1a614238.tar.gz systemtap-steved-3f99432cc11977a4345c881bed3aa60a1a614238.tar.xz systemtap-steved-3f99432cc11977a4345c881bed3aa60a1a614238.zip |
2007-08-07 Frank Ch. Eigler <fche@redhat.com>
PR 4846
* parse.cxx (input_put): New function, sort of like stdio ungetc.
(input_get): Skip cursor position changing for input_put strings.
(scan): Rework $.../@... substitution into character pasting.
* parse.h: Corresponding changes.
* util.h (lex_cast_qstring): Octal-quote unprintable characters.
* stap.1.in, NEWS: Document new behaviour.
2007-08-07 Frank Ch. Eigler <fche@redhat.com>
PR 4846
* parseko/preprocess13.stp, parseok/nineteen.stp,
semok/twentyfive.stp: New tests.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | NEWS | 16 | ||||
-rw-r--r-- | parse.cxx | 192 | ||||
-rw-r--r-- | parse.h | 3 | ||||
-rw-r--r-- | stap.1.in | 14 | ||||
-rw-r--r-- | testsuite/ChangeLog | 6 | ||||
-rwxr-xr-x | testsuite/parseko/preprocess13.stp | 4 | ||||
-rwxr-xr-x | testsuite/parseok/nineteen.stp | 3 | ||||
-rwxr-xr-x | testsuite/semok/twentyfive.stp | 8 | ||||
-rw-r--r-- | util.h | 21 |
10 files changed, 185 insertions, 92 deletions
@@ -1,3 +1,13 @@ +2007-08-07 Frank Ch. Eigler <fche@redhat.com> + + PR 4846 + * parse.cxx (input_put): New function, sort of like stdio ungetc. + (input_get): Skip cursor position changing for input_put strings. + (scan): Rework $.../@... substitution into character pasting. + * parse.h: Corresponding changes. + * util.h (lex_cast_qstring): Octal-quote unprintable characters. + * stap.1.in, NEWS: Document new behaviour. + 2007-07-26 David Smith <dsmith@redhat.com> PR 4295 @@ -1,5 +1,21 @@ * What's new since version 0.5.14? +- The way in which command line arguments for scripts are substituted has + changed. Previously, $1 etc. would interpret the corresponding command + line argument as an numeric literal, and @1 as a string literal. Now, + the command line arguments are pasted uninterpreted wherever $1 etc. + appears at the beginning of a token. @1 is similar, but is quoted as + a string. This change does not modify old scripts, but has the effect + of permitting substitution of arbitrary token sequences. + + # This worked before, and still does: + % stap -e 'probe timer.s($1) {}' 5 + # Now this also works: + % stap -e 'probe syscall.$1 {log(@1)}' open + # This won't crash, just signal a recursion error: + % stap -e '$1' '$1' + # As before, $1... is recognized only at the beginning of a token + % stap -e 'probe begin {foo$1=5}' * What's new since version 0.5.13? @@ -12,6 +12,8 @@ #include "staptree.h" #include "parse.h" #include "session.h" +#include "util.h" + #include <iostream> #include <fstream> #include <cctype> @@ -21,6 +23,7 @@ #include <climits> #include <sstream> #include <cstring> +#include <cctype> using namespace std; @@ -488,7 +491,8 @@ parser::peek_kw (std::string const & kw) lexer::lexer (istream& i, const string& in, systemtap_session& s): - input (i), input_name (in), cursor_line (1), cursor_column (1), session(s) + input (i), input_name (in), cursor_suspend_count(0), + cursor_line (1), cursor_column (1), session(s) { } @@ -512,30 +516,61 @@ lexer::input_get () if (c < 0) return c; // EOF - // update source cursor - if (c == '\n') + if (cursor_suspend_count) + // Track effect of input_put: preserve previous cursor/line_column + // until all of its characters are consumed. + cursor_suspend_count --; + else { - cursor_line ++; - cursor_column = 1; + // update source cursor + if (c == '\n') + { + cursor_line ++; + cursor_column = 1; + } + else + cursor_column ++; } - else - cursor_column ++; return c; } +void +lexer::input_put (const string& chars) +{ + // clog << "[put:" << chars << "]"; + for (int i=chars.size()-1; i>=0; i--) + { + int c = chars[i]; + lookahead.insert (lookahead.begin(), c); + cursor_suspend_count ++; + } +} + + token* lexer::scan (bool expand_args) { token* n = new token; n->location.file = input_name; + unsigned semiskipped_p = 0; + skip: n->location.line = cursor_line; n->location.column = cursor_column; + semiskip: + if (semiskipped_p > 1) + { + input_get (); + throw parse_error ("invalid nested substitution of command line arguments"); + } + int c = input_get(); + int c2 = input_peek (); + // clog << "{" << (char)c << (char)c2 << "}"; if (c < 0) { delete n; @@ -545,81 +580,77 @@ lexer::scan (bool expand_args) if (isspace (c)) goto skip; + // Paste command line arguments as character streams into + // the beginning of a token. $1..$999 go through as raw + // characters; @1..@999 are quoted/escaped as strings. + // $# and @# expand to the number of arguments, similarly + // raw or quoted. + if (expand_args && + (c == '$' || c == '@') && + (c2 == '#')) + { + input_get(); // swallow '#' + stringstream converter; + converter << session.args.size (); + if (c == '$') input_put (converter.str()); + else input_put (lex_cast_qstring (converter.str())); + semiskipped_p ++; + goto semiskip; + } + else if (expand_args && + (c == '$' || c == '@') && + (isdigit (c2))) + { + unsigned idx = 0; + do + { + input_get (); + idx = (idx * 10) + (c2 - '0'); + c2 = input_peek (); + } while (c2 > 0 && + isdigit (c2) && + idx <= session.args.size()); // prevent overflow + if (idx == 0 || + idx-1 >= session.args.size()) + throw parse_error ("command line argument index invalid or out of range", n); + + string arg = session.args[idx-1]; + if (c == '$') input_put (arg); + else input_put (lex_cast_qstring (arg)); + semiskipped_p ++; + goto semiskip; + } + else if (isalpha (c) || c == '$' || c == '@' || c == '_') { n->type = tok_identifier; n->content = (char) c; - while (1) + while (isalnum (c2) || c2 == '_' || c2 == '$') { - int c2 = input_peek (); - if (! input) - break; - if ((isalnum(c2) || c2 == '_' || c2 == '$' || c2 == '#' )) - { - n->content.push_back(c2); - input_get (); - } - else - break; - } - - // Expand command line arguments to literals. $1 .. $999 as - // numbers and @1 .. @999 as strings. - if (n->content[0] == '@' || n->content[0] == '$') - { - if (!expand_args) - return n; - if (n->content[1] == '#') - { - stringstream converter; - converter << session.args.size (); - n->type = (n->content[0] == '@') ? tok_string : tok_number; - n->content = converter.str(); - } - else - { - string idxstr = n->content.substr(1); - const char* startp = idxstr.c_str(); - char *endp; - errno = 0; - unsigned long idx = strtoul (startp, &endp, 10); - if (endp == startp) - ; // no numbers at all - leave alone as identifier - else - { - // Use @1/$1 as the base, not @0/$0. Thus the idx-1. - if (errno == ERANGE || errno == EINVAL || *endp != '\0' || - idx == 0 || idx-1 >= session.args.size ()) - throw parse_error ("command line argument index invalid or out of range", n); - - string arg = session.args[idx-1]; - n->type = (n->content[0] == '@') ? tok_string : tok_number; - n->content = arg; - } - } - } - else - { - if (n->content == "probe" - || n->content == "global" - || n->content == "function" - || n->content == "if" - || n->content == "else" - || n->content == "for" - || n->content == "foreach" - || n->content == "in" - || n->content == "limit" - || n->content == "return" - || n->content == "delete" - || n->content == "while" - || n->content == "break" - || n->content == "continue" - || n->content == "next" - || n->content == "string" - || n->content == "long") - n->type = tok_keyword; + input_get (); + n->content.push_back (c2); + c2 = input_peek (); } + if (n->content == "probe" + || n->content == "global" + || n->content == "function" + || n->content == "if" + || n->content == "else" + || n->content == "for" + || n->content == "foreach" + || n->content == "in" + || n->content == "limit" + || n->content == "return" + || n->content == "delete" + || n->content == "while" + || n->content == "break" + || n->content == "continue" + || n->content == "next" + || n->content == "string" + || n->content == "long") + n->type = tok_keyword; + return n; } @@ -631,7 +662,7 @@ lexer::scan (bool expand_args) while (1) { int c2 = input_peek (); - if (! input) + if (c2 < 0) break; // NB: isalnum is very permissive. We rely on strtol, called in @@ -656,14 +687,14 @@ lexer::scan (bool expand_args) { c = input_get (); - if (! input || c == '\n') + if (c < 0 || c == '\n') { n->type = tok_junk; break; } if (c == '\"') // closing double-quotes break; - else if (c == '\\') + else if (c == '\\') // see also input_put { c = input_get (); switch (c) @@ -677,14 +708,13 @@ lexer::scan (bool expand_args) case 'r': case '0' ... '7': // NB: need only match the first digit case '\\': - // Pass these escapes through to the string value - // beign parsed; it will be emitted into a C literal. + // being parsed; it will be emitted into a C literal. n->content.push_back('\\'); + // fall through default: - n->content.push_back(c); break; } @@ -74,10 +74,13 @@ public: private: int input_get (); + void input_put (int); + void input_put (const std::string&); int input_peek (unsigned n=0); std::istream& input; std::string input_name; std::vector<int> lookahead; + unsigned cursor_suspend_count; unsigned cursor_line; unsigned cursor_column; systemtap_session& session; @@ -168,7 +168,7 @@ precedence. Whitespace is ignored. Three forms of comments are supported: .RS .br -.BR # " ... shell style, to the end of line" +.BR # " ... shell style, to the end of line, except for $# and @#" .br .BR // " ... C++ style, to the end of line" .br @@ -182,17 +182,17 @@ Integers are 64-bit signed quantities, although the parser also accepts (and wraps around) values above positive 2**63. .PP In addition, script arguments given at the end of the command line may -be expanded as literals. Use +be inserted. Use .B $1 ... $<NN> -for casting as a numeric literal and +for insertion unquoted, .B @1 ... @<NN> -for casting as string literal. The number of arguments may be accessed +for insertion as a string literal. The number of arguments may be accessed through .B $# -(as a numeric literal) or through +(as an unquoted number) or through .B @# -(as a string literal). These may be used in all contexts where literals -are accepted, including preprocessing stage. Reference to an argument +(as a quoted number). These may be used at any place a token may begin, +including within the preprocessing stage. Reference to an argument number beyond what was actually given is an error. .SS PREPROCESSING diff --git a/testsuite/ChangeLog b/testsuite/ChangeLog index 8a95ff1e..ee107c22 100644 --- a/testsuite/ChangeLog +++ b/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2007-08-07 Frank Ch. Eigler <fche@redhat.com> + + PR 4846 + * parseko/preprocess13.stp, parseok/nineteen.stp, + semok/twentyfive.stp: New tests. + 2007-08-07 David Smith <dsmith@redhat.com> PR 4736 (partial fix) diff --git a/testsuite/parseko/preprocess13.stp b/testsuite/parseko/preprocess13.stp new file mode 100755 index 00000000..9ef34f59 --- /dev/null +++ b/testsuite/parseko/preprocess13.stp @@ -0,0 +1,4 @@ +#! /bin/sh + +# recursive +stap -p1 -e '$1' '$1' diff --git a/testsuite/parseok/nineteen.stp b/testsuite/parseok/nineteen.stp new file mode 100755 index 00000000..1d36b590 --- /dev/null +++ b/testsuite/parseok/nineteen.stp @@ -0,0 +1,3 @@ +#! /bin/sh + +stap -p1 -e '$1 {log (@1.@2)}' 'probe foo' '' # <- control codes diff --git a/testsuite/semok/twentyfive.stp b/testsuite/semok/twentyfive.stp new file mode 100755 index 00000000..fadca484 --- /dev/null +++ b/testsuite/semok/twentyfive.stp @@ -0,0 +1,8 @@ +#! /bin/sh + +set -e +stap -p2 -e 'probe begin {foo$1$2$3}' # $-expansion only at token head +stap -p2 -e 'probe $1 $2' 'syscall.open,begin' '{log ("hello\n")}' +stap -p2 -e 'probe begin {@1 @2}' 'syscall.open,begin' '{log ("hello\n")}' +stap -p2 -e 'probe begin {log (@1.@2)}' 'syscall.open,begin' '{log ("hello\n")}' + @@ -3,7 +3,7 @@ #include <iostream> #include <sstream> #include <stdexcept> - +#include <cctype> const char *get_home_directory(void); int copy_file(const char *src, const char *dest); @@ -65,9 +65,22 @@ lex_cast_qstring(IN const & in) out2 += '"'; for (unsigned i=0; i<out.length(); i++) { - if (out[i] == '"' || out[i] == '\\') // XXX others? - out2 += '\\'; - out2 += out[i]; + char c = out[i]; + if (! isprint(c)) + { + out2 += '\\'; + // quick & dirty octal converter + out2 += "01234567" [(c >> 6) & 0x07]; + out2 += "01234567" [(c >> 3) & 0x07]; + out2 += "01234567" [(c >> 0) & 0x07]; + } + else if (c == '"' || c == '\\') + { + out2 += '\\'; + out2 += c; + } + else + out2 += c; } out2 += '"'; return out2; |