bugfix: missing support for escape sequences in RainerScript

Only \' was supported. Now the usual set is supported. Note that v5 used \x as escape where x was any character (e.g. "\n" meant "n" and NOT LF). This also means there is some incompatibility to v5 for well-know sequences. Better break it now than later.
author: Rainer Gerhards <rgerhards@adiscon.com> 2012-09-13 09:30:20 +0200
committer: Rainer Gerhards <rgerhards@adiscon.com> 2012-09-13 09:30:20 +0200
commit: 10bef02e8f8f6bec4f1c18d9c634aa6927f4611a (patch)
tree: 778f982d6aa0d1ccda31c8b011418587c04437af
parent: 4f0672f601c74cb60ae32dfa67cccc3336dd674d (diff)
download: rsyslog-10bef02e8f8f6bec4f1c18d9c634aa6927f4611a.tar.gz
rsyslog-10bef02e8f8f6bec4f1c18d9c634aa6927f4611a.tar.xz
rsyslog-10bef02e8f8f6bec4f1c18d9c634aa6927f4611a.zip
5 files changed, 162 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index fe272c7d..0e367f9c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,11 @@
 Version 6.4.2  [V6-STABLE] 2012-09-??
 - bugfix: remove invalid socket option call from imuxsock
   Thanks to Cristian Ionescu-Idbohrn and Jonny Törnbom 
+- bugfix: missing support for escape sequences in RainerScript
+  only \' was supported. Now the usual set is supported. Note that v5
+  used \x as escape where x was any character (e.g. "\n" meant "n" and NOT
+  LF). This also means there is some incompatibility to v5 for well-know
+  sequences. Better break it now than later.
 ---------------------------------------------------------------------------
 Version 6.4.1  [V6-STABLE] 2012-09-06
 - bugfix: multiple main queues with same queue file name were not detected
diff --git a/doc/v6compatibility.html b/doc/v6compatibility.html
index 1f830854..c1799974 100644
--- a/doc/v6compatibility.html
+++ b/doc/v6compatibility.html
@@ -162,6 +162,15 @@ As you see, here you may include spaces between user names.
 so it is a wise decision to change config files at least to the legacy
 format (with ":omusrmsg:" in front of the name).
 
+<h2>Escape Sequences in Script-Based Filters<h2>
+<p>In v5, escape sequences were very simplistic. Inside a string, "\x" meant
+"x" with x being any character. This has been changed so that the usual set of
+escapes is supported, must importantly "\n", "\t", "\xhh" (with hh being hex digits)
+and "\ooo" with (o being octal digits). So if one of these sequences was used
+previously, results are obviously different. However, that should not create any
+real problems, because it is hard to envision why someone should have done that
+(why write "\n" when you can also write "n"?).
+---------------------------------------------------------------------------
 <p>[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
 <p><font size="2">This documentation is part of the
 <a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
diff --git a/grammar/lexer.l b/grammar/lexer.l
index e688ffce..289d54ff 100644
--- a/grammar/lexer.l
+++ b/grammar/lexer.l
@@ -122,9 +122,15 @@ int fileno(FILE *stream);
 <EXPR>0x[0-7a-f] |		/* hex number, following rule is dec; strtoll handles all! */
 <EXPR>([1-9][0-9]*|0)		{ yylval.n = strtoll(yytext, NULL, 0); return NUMBER; }
 <EXPR>\$[$!]{0,1}[a-z][a-z0-9\-_\.]*	{ yylval.s = strdup(yytext); return VAR; }
-<EXPR>\'([^'\\]|\\['])*\'	 { yylval.estr = es_newStrFromBuf(yytext+1, yyleng-2);
+<EXPR>\'([^'\\]|\\['"\\$bntr]|\\x[0-9a-f][0-9a-f]|\\[0-7][0-7][0-7])*\'	 {
+				   yytext[yyleng-1] = '\0';
+				   unescapeStr((uchar*)yytext+1, yyleng-2);
+				   yylval.estr = es_newStrFromBuf(yytext+1, strlen(yytext));
 				   return STRING; }
-<EXPR>\"([^"\\]|\\["])*\"	 { yylval.estr = es_newStrFromBuf(yytext+1, yyleng-2);
+<EXPR>\"([^"\\$]|\\["'\\$bntr]|\\x[0-9a-f][0-9a-f]|\\[0-7][0-7][0-7])*\" {
+				   yytext[yyleng-1] = '\0';
+				   unescapeStr((uchar*)yytext+1, yyleng-2);
+				   yylval.estr = es_newStrFromBuf(yytext+1, yyleng-2);
 				   return STRING; }
 <EXPR>[ \t\n]
 <EXPR>[a-z][a-z0-9_]*		{ yylval.estr = es_newStrFromCStr(yytext, yyleng);
diff --git a/grammar/rainerscript.c b/grammar/rainerscript.c
index a5cc10c2..de63f692 100644
--- a/grammar/rainerscript.c
+++ b/grammar/rainerscript.c
@@ -1616,3 +1616,142 @@ cstrPrint(char *text, es_str_t *estr)
 	dbgprintf("%s%s", text, str);
 	free(str);
 }
+
+
+/* we need a function to check for octal digits */
+static inline int
+isodigit(uchar c)
+{
+	return(c >= '0' && c <= '7');
+}
+
+/**
+ * Get numerical value of a hex digit. This is a helper function.
+ * @param[in] c a character containing 0..9, A..Z, a..z anything else
+ * is an (undetected) error.
+ */
+static inline int
+hexDigitVal(char c)
+{
+	int r;
+	if(c < 'A')
+		r = c - '0';
+	else if(c < 'a')
+		r = c - 'A' + 10;
+	else
+		r = c - 'a' + 10;
+	return r;
+}
+
+/* Handle the actual unescaping.
+ * a helper to unescapeStr(), to help make the function easier to read.
+ */
+static inline void
+doUnescape(unsigned char *c, int len, int *iSrc, int iDst)
+{
+	if(c[*iSrc] == '\\') {
+		if(++(*iSrc) == len) {
+			/* error, incomplete escape, treat as single char */
+			c[iDst] = '\\';
+		}
+		/* regular case, unescape */
+		switch(c[*iSrc]) {
+		case 'a':
+			c[iDst] = '\007';
+			break;
+		case 'b':
+			c[iDst] = '\b';
+			break;
+		case 'f':
+			c[iDst] = '\014';
+			break;
+		case 'n':
+			c[iDst] = '\n';
+			break;
+		case 'r':
+			c[iDst] = '\r';
+			break;
+		case 't':
+			c[iDst] = '\t';
+			break;
+		case '\'':
+			c[iDst] = '\'';
+			break;
+		case '"':
+			c[iDst] = '"';
+			break;
+		case '?':
+			c[iDst] = '?';
+			break;
+		case '$':
+			c[iDst] = '$';
+			break;
+		case '\\':
+			c[iDst] = '\\';
+			break;
+		case 'x':
+			if(    (*iSrc)+2 >= len
+			   || !isxdigit(c[(*iSrc)+1])
+			   || !isxdigit(c[(*iSrc)+2])) {
+				/* error, incomplete escape, use as is */
+				c[iDst] = '\\';
+				--(*iSrc);
+			}
+			c[iDst] = (hexDigitVal(c[(*iSrc)+1]) << 4) +
+				  hexDigitVal(c[(*iSrc)+2]);
+			*iSrc += 2;
+			break;
+		case '0': /* octal escape */
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+			if(    (*iSrc)+2 >= len
+			   || !isodigit(c[(*iSrc)+1])
+			   || !isodigit(c[(*iSrc)+2])) {
+				/* error, incomplete escape, use as is */
+				c[iDst] = '\\';
+				--(*iSrc);
+			}
+			c[iDst] = ((c[(*iSrc)  ] - '0') << 6) +
+			          ((c[(*iSrc)+1] - '0') << 3) +
+			          ( c[(*iSrc)+2] - '0');
+			*iSrc += 2;
+			break;
+		default:
+			/* error, incomplete escape, indicate by '?' */
+			c[iDst] = '?';
+			break;
+		}
+	} else {
+		/* regular character */
+		c[iDst] = c[*iSrc];
+	}
+}
+
+void
+unescapeStr(uchar *s, int len)
+{
+	int iSrc, iDst;
+	assert(s != NULL);
+
+	/* scan for first escape sequence (if we are luky, there is none!) */
+	iSrc = 0;
+	while(iSrc < len && s[iSrc] != '\\')
+		++iSrc;
+	/* now we have a sequence or end of string. In any case, we process
+	 * all remaining characters (maybe 0!) and unescape.
+	 */
+	if(iSrc != len) {
+		iDst = iSrc;
+		while(iSrc < len) {
+			doUnescape(s, len, &iSrc, iDst);
+			++iSrc;
+			++iDst;
+		}
+	}
+	s[iDst] = '\0';
+}
diff --git a/grammar/rainerscript.h b/grammar/rainerscript.h
index e11ae62f..a52b3fa8 100644
--- a/grammar/rainerscript.h
+++ b/grammar/rainerscript.h
@@ -249,6 +249,7 @@ void cnfparamsPrint(struct cnfparamblk *params, struct cnfparamvals *vals);
 void varDelete(struct var *v);
 void cnfparamvalsDestruct(struct cnfparamvals *paramvals, struct cnfparamblk *blk);
 void cnfcfsyslinelstDestruct(struct cnfcfsyslinelst *cfslst);
+void unescapeStr(uchar *s, int len);
 
 /* debug helper */
 void cstrPrint(char *text, es_str_t *estr);
author	Rainer Gerhards <rgerhards@adiscon.com>	2012-09-13 09:30:20 +0200
committer	Rainer Gerhards <rgerhards@adiscon.com>	2012-09-13 09:30:20 +0200
commit	10bef02e8f8f6bec4f1c18d9c634aa6927f4611a (patch)
tree	778f982d6aa0d1ccda31c8b011418587c04437af
parent	4f0672f601c74cb60ae32dfa67cccc3336dd674d (diff)
download	rsyslog-10bef02e8f8f6bec4f1c18d9c634aa6927f4611a.tar.gz rsyslog-10bef02e8f8f6bec4f1c18d9c634aa6927f4611a.tar.xz rsyslog-10bef02e8f8f6bec4f1c18d9c634aa6927f4611a.zip