From 7a146af86f153a14b525333df795b78e01b63b4a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 19 Feb 2008 17:38:37 +0000 Subject: - begun implementation of expression parsing logic - implemented, simpstr, var, number in tokenizer --- conf.c | 4 + ctok.c | 252 ++++++++++++++++++++++++++++++++++++---------------- ctok.h | 50 ++++++----- doc/expression.html | 2 +- expr.c | 156 +++++++++++++++++++++++++++++++- obj-types.h | 2 +- rsyslog.h | 3 + 7 files changed, 364 insertions(+), 105 deletions(-) diff --git a/conf.c b/conf.c index c8fa9ab6..6e03e5b8 100644 --- a/conf.c +++ b/conf.c @@ -766,6 +766,10 @@ dbgprintf("calling expression parser, pp %p ('%s')\n", *pline, *pline); dbgprintf("end expression parser, pp %p ('%s')\n", *pline, *pline); finalize_it: + if(iRet == RS_RET_SYNTAX_ERROR) { + logerror("syntax error in expression"); + } + RETiRet; } diff --git a/ctok.c b/ctok.c index b6301ff6..ed413d6d 100644 --- a/ctok.c +++ b/ctok.c @@ -132,6 +132,7 @@ finalize_it: } +#if 0 /* get the next word from the input "stream" (currently just a in-memory * string...). A word is anything between whitespace. If the word is longer * than the provided memory buffer, parsing terminates when buffer length @@ -162,75 +163,162 @@ dbgprintf("end ctokGetWorkFromStream, stream now '%s'\n", pThis->pp); finalize_it: RETiRet; } +#endif -#if 0 -/* Get the next token from the input stream. This parses the next token and - * ignores any whitespace in between. End of stream is communicated via iRet. +/* read in a constant number + * This is the "number" ABNF element * rgerhards, 2008-02-19 */ -rsRetVal -ctokGetNextToken(ctok_t *pThis, ctok_token_t *pToken) +static rsRetVal +ctokGetNumber(ctok_t *pThis, ctok_token_t *pToken) { DEFiRet; - uchar pszWord[128]; + int64 n; /* the parsed number */ + uchar c; + int valC; + int iBase; ISOBJ_TYPE_assert(pThis, ctok); ASSERT(pToken != NULL); - CHKiRet(ctokGetWordFromStream(pThis, pszWord, sizeof(pszWord)/sizeof(uchar))); - - /* now recognize words... */ - if(strcasecmp((char*)pszWord, "or")) { - *pToken = ctok_OR; - } else if(strcasecmp((char*)pszWord, "and")) { - *pToken = ctok_AND; - } else if(strcasecmp((char*)pszWord, "+")) { - *pToken = ctok_PLUS; - } else if(strcasecmp((char*)pszWord, "-")) { - *pToken = ctok_MINUS; - } else if(strcasecmp((char*)pszWord, "*")) { - *pToken = ctok_TIMES; - } else if(strcasecmp((char*)pszWord, "/")) { - *pToken = ctok_DIV; - } else if(strcasecmp((char*)pszWord, "%")) { - *pToken = ctok_MOD; - } else if(strcasecmp((char*)pszWord, "not")) { - *pToken = ctok_NOT; - } else if(strcasecmp((char*)pszWord, "(")) { - *pToken = ctok_LPAREN; - } else if(strcasecmp((char*)pszWord, ")")) { - *pToken = ctok_RPAREN; - } else if(strcasecmp((char*)pszWord, ",")) { - *pToken = ctok_COMMA; - } else if(strcasecmp((char*)pszWord, "$")) { - *pToken = ctok_DOLLAR; - } else if(strcasecmp((char*)pszWord, "'")) { - *pToken = ctok_QUOTE; - } else if(strcasecmp((char*)pszWord, "\"")) { - *pToken = ctok_DBL_QUOTE; - } else if(strcasecmp((char*)pszWord, "==")) { - *pToken = ctok_CMP_EQ; - } else if(strcasecmp((char*)pszWord, "!=")) { - *pToken = ctok_CMP_NEQ; - } else if(strcasecmp((char*)pszWord, "<>")) { /* an alias for the non-C folks... */ - *pToken = ctok_CMP_NEQ; - } else if(strcasecmp((char*)pszWord, "<")) { - *pToken = ctok_CMP_LT; - } else if(strcasecmp((char*)pszWord, ">")) { - *pToken = ctok_CMP_GT; - } else if(strcasecmp((char*)pszWord, "<=")) { - *pToken = ctok_CMP_LTEQ; - } else if(strcasecmp((char*)pszWord, ">=")) { - *pToken = ctok_CMP_GTEQ; + pToken->tok = ctok_NUMBER; + + CHKiRet(ctokGetCharFromStream(pThis, &c)); + if(c == '0') { /* octal? */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); + if(c == 'x') { /* nope, hex! */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); + c = tolower(c); + iBase = 16; + } else { + iBase = 8; + } + } else { + iBase = 10; } + + n = 0; + /* this loop is quite simple, a variable name is terminated by whitespace. */ + while(isdigit(c) || (c >= 'a' && c <= 'f')) { + if(isdigit(c)) { + valC = c - '0'; + } else { + valC = c - 'a' + 10; + } + + if(valC >= iBase) { + if(iBase == 8) { + ABORT_FINALIZE(RS_RET_INVALID_OCTAL_DIGIT); + } else { + ABORT_FINALIZE(RS_RET_INVALID_HEX_DIGIT); + } + } + /* we now have the next value and know it is right */ + n = n * iBase + valC; + CHKiRet(ctokGetCharFromStream(pThis, &c)); + c = tolower(c); + } + pToken->intVal = n; -RUNLOG_VAR("%d", *pToken); +dbgprintf("number, number is: '%lld'\n", pToken->intVal); finalize_it: RETiRet; } -#endif + + +/* read in a variable + * This covers both msgvar and sysvar from the ABNF. + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetVar(ctok_t *pThis, ctok_token_t *pToken) +{ + DEFiRet; + uchar c; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pToken != NULL); + + CHKiRet(ctokGetCharFromStream(pThis, &c)); + + if(c == '$') { /* second dollar, we have a system variable */ + pToken->tok = ctok_SYSVAR; + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* "eat" it... */ + } else { + pToken->tok = ctok_MSGVAR; + } + + CHKiRet(rsCStrConstruct(&pToken->pstrVal)); + /* this loop is quite simple, a variable name is terminated by whitespace. */ + while(!isspace(c)) { + CHKiRet(rsCStrAppendChar(pToken->pstrVal, tolower(c))); + CHKiRet(ctokGetCharFromStream(pThis, &c)); + } + CHKiRet(rsCStrFinish(pStrB)); + +dbgprintf("var, var is: '%s'\n", rsCStrGetSzStr(pToken->pstrVal)); + +finalize_it: + if(iRet != RS_RET_OK) { + if(pToken->pstrVal != NULL) { + rsCStrDestruct(pToken->pstrVal); + pToken->pstrVal = NULL; + } + } + + RETiRet; +} + + +/* read in a simple string (simpstr in ABNF) + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetSimpStr(ctok_t *pThis, ctok_token_t *pToken) +{ + DEFiRet; + uchar c; + int bInEsc = 0; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pToken != NULL); + + pToken->tok = ctok_SIMPSTR; + + CHKiRet(rsCStrConstruct(&pToken->pstrVal)); + CHKiRet(ctokGetCharFromStream(pThis, &c)); + /* while we are in escape mode (had a backslash), no sequence + * terminates the loop. If outside, it is terminated by a single quote. + */ + while(bInEsc || c != '\'') { + if(bInEsc) { + CHKiRet(rsCStrAppendChar(pToken->pstrVal, c)); + bInEsc = 0; + } else { + if(c == '\\') { + bInEsc = 1; + } else { + CHKiRet(rsCStrAppendChar(pToken->pstrVal, c)); + } + } + CHKiRet(ctokGetCharFromStream(pThis, &c)); + } + CHKiRet(rsCStrFinish(pStrB)); + +dbgprintf("simpstr, str is: '%s'\n", rsCStrGetSzStr(pToken->pstrVal)); + +finalize_it: + if(iRet != RS_RET_OK) { + if(pToken->pstrVal != NULL) { + rsCStrDestruct(pToken->pstrVal); + pToken->pstrVal = NULL; + } + } + + RETiRet; +} /* Get the next token from the input stream. This parses the next token and @@ -252,91 +340,97 @@ ctokGetNextToken(ctok_t *pThis, ctok_token_t *pToken) switch(c) { case 'o':/* or */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ - *pToken = (c == 'r')? ctok_OR : ctok_INVALID; + pToken->tok = (c == 'r')? ctok_OR : ctok_INVALID; break; case 'a': /* and */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ if(c == 'n') { CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ - *pToken = (c == 'd')? ctok_AND : ctok_INVALID; + pToken->tok = (c == 'd')? ctok_AND : ctok_INVALID; } else { - *pToken = ctok_INVALID; + pToken->tok = ctok_INVALID; } break; case 'n': /* not */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ if(c == 'o') { CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ - *pToken = (c == 't')? ctok_NOT : ctok_INVALID; + pToken->tok = (c == 't')? ctok_NOT : ctok_INVALID; } else { - *pToken = ctok_INVALID; + pToken->tok = ctok_INVALID; } break; case '=': /* == */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ - *pToken = (c == '=')? ctok_CMP_EQ : ctok_INVALID; + pToken->tok = (c == '=')? ctok_CMP_EQ : ctok_INVALID; break; case '!': /* != */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ - *pToken = (c == '=')? ctok_CMP_NEQ : ctok_INVALID; + pToken->tok = (c == '=')? ctok_CMP_NEQ : ctok_INVALID; break; case '<': /* <, <=, <> */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ if(c == '=') { - *pToken = ctok_CMP_LTEQ; + pToken->tok = ctok_CMP_LTEQ; } else if(c == '>') { - *pToken = ctok_CMP_NEQ; + pToken->tok = ctok_CMP_NEQ; } else { - *pToken = ctok_CMP_LT; + pToken->tok = ctok_CMP_LT; } break; case '>': /* >, >= */ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ if(c == '=') { - *pToken = ctok_CMP_GTEQ; + pToken->tok = ctok_CMP_GTEQ; } else { - *pToken = ctok_CMP_GT; + pToken->tok = ctok_CMP_GT; } break; case '+': - *pToken = ctok_PLUS; + pToken->tok = ctok_PLUS; break; case '-': - *pToken = ctok_MINUS; + pToken->tok = ctok_MINUS; break; case '*': - *pToken = ctok_TIMES; + pToken->tok = ctok_TIMES; break; case '/': - *pToken = ctok_DIV; + pToken->tok = ctok_DIV; break; case '%': - *pToken = ctok_MOD; + pToken->tok = ctok_MOD; break; case '(': - *pToken = ctok_LPAREN; + pToken->tok = ctok_LPAREN; break; case ')': - *pToken = ctok_RPAREN; + pToken->tok = ctok_RPAREN; break; case ',': - *pToken = ctok_COMMA; + pToken->tok = ctok_COMMA; break; case '$': - *pToken = ctok_DOLLAR; + CHKiRet(ctokGetVar(pThis, pToken)); break; - case '\'': - *pToken = ctok_QUOTE; + case '\'': /* simple string, this is somewhat more elaborate */ + CHKiRet(ctokGetSimpStr(pThis, pToken)); break; case '"': - *pToken = ctok_DBL_QUOTE; + /* TODO: template string parser */ + ABORT_FINALIZE(RS_RET_NOT_IMPLEMENTED); break; default: - *pToken = ctok_INVALID; + if(isdigit(c)) { + CHKiRet(ctokUngetCharFromStream(pThis, c)); /* push back, we need this digit */ + CHKiRet(ctokGetNumber(pThis, pToken)); + } else { + pToken->tok = ctok_INVALID; + } break; } -RUNLOG_VAR("%d", *pToken); +RUNLOG_VAR("%d", pToken->tok); finalize_it: RETiRet; diff --git a/ctok.h b/ctok.h index b02a83c2..56133e4e 100644 --- a/ctok.h +++ b/ctok.h @@ -27,28 +27,34 @@ /* the tokens... I use numbers below so that the tokens can be easier * identified in debug output. */ -typedef enum { - ctok_INVALID = 0, - ctok_OR = 1, - ctok_AND = 2, - ctok_PLUS = 3, - ctok_MINUS = 4, - ctok_TIMES = 5, /* "*" */ - ctok_DIV = 6, - ctok_MOD = 7, - ctok_NOT = 8, - ctok_RPAREN = 9, - ctok_LPAREN = 10, - ctok_COMMA = 11, - ctok_DOLLAR = 12, - ctok_QUOTE = 13, - ctok_DBL_QUOTE = 14, - ctok_CMP_EQ = 15, - ctok_CMP_NEQ = 16, - ctok_CMP_LT = 17, - ctok_CMP_GT = 18, - ctok_CMP_LTEQ = 19, - ctok_CMP_GTEQ = 20, +typedef struct { + enum { + ctok_INVALID = 0, + ctok_OR = 1, + ctok_AND = 2, + ctok_PLUS = 3, + ctok_MINUS = 4, + ctok_TIMES = 5, /* "*" */ + ctok_DIV = 6, + ctok_MOD = 7, + ctok_NOT = 8, + ctok_RPAREN = 9, + ctok_LPAREN = 10, + ctok_COMMA = 11, + ctok_SYSVAR = 12, + ctok_MSGVAR = 13, + ctok_SIMPSTR = 14, + ctok_TPLSTR = 15, + ctok_CMP_EQ = 16, + ctok_CMP_NEQ = 17, + ctok_CMP_LT = 18, + ctok_CMP_GT = 19, + ctok_CMP_LTEQ = 20, + ctok_CMP_GTEQ = 21, + ctok_NUMBER = 22 + } tok; + rsCStrObj *pstrVal; + int64 intVal; } ctok_token_t; /* the ctokession object */ diff --git a/doc/expression.html b/doc/expression.html index 5fda4915..7ce9429a 100644 --- a/doc/expression.html +++ b/doc/expression.html @@ -11,7 +11,7 @@ far, they are supported for filtering messages.

Formal Definition

Below is the formal definition of expression format (in ABNF, RFC 2234):
-

expr     := e_and *("or" e_and)
e_and := e_cmp *("and" e_cmp)
e_cmp := val cmp_op val
val := ["+" / "-"] term *(("+" / "-") term)
term := factor *(("*" / "/" / "%") factor)
factor := ["not"] terminal
terminal := var / constant / function / "(" expr ")"
function := name "(" *("," expr) ")"
var := "$" varname
varname := msgvar / sysvar
msgvar := name
sysvar := "$" name
name := alpha *(alnum)
constant := string / number
string := simpstr / tplstr ; tplstr will be implemented in next phase
simpstr := "'" *char "'" ; use your imagination for char ;)
tplstr := '"' template '"' ; not initially implemented
number := 1*digit
cmp_op := "==" / "!=" / "<>" / "<" / ">" / "<=" / ">="
digit := %x30-39
alpha := "a" ... "z" # all letters
alnum :* alpha / digit / "_"
+

expr     := e_and *("or" e_and)
e_and := e_cmp *("and" e_cmp)
e_cmp := val cmp_op val
val := ["+" / "-"] term *(("+" / "-") term)
term := factor *(("*" / "/" / "%") factor)
factor := ["not"] terminal
terminal := var / constant / function / "(" expr ")"
function := name "(" *("," expr) ")"
var := "$" varname
varname := msgvar / sysvar
msgvar := name
sysvar := "$" name
name := alpha *(alnum)
constant := string / number
string := simpstr / tplstr ; tplstr will be implemented in next phase
simpstr := "'" *char "'" ; use your imagination for char ;)
tplstr := '"' template '"' ; not initially implemented
number := 1*digit ; 0nn = octal, 0xnn = hex, nn = decimal
cmp_op := "==" / "!=" / "<>" / "<" / ">" / "<=" / ">="
digit := %x30-39
alpha := "a" ... "z" # all letters
alnum :* alpha / digit / "_"

[rsyslog.conf overview] [manual index] [rsyslog site]

This documentation is part of the diff --git a/expr.c b/expr.c index 114f9587..5cb51c5f 100644 --- a/expr.c +++ b/expr.c @@ -38,6 +38,159 @@ DEFobjStaticHelpers +/* ------------------------------ parser functions ------------------------------ */ +/* the following functions implement the parser. They are all static. For + * simplicity, the function names match their ABNF definition. The ABNF is defined + * in the doc set. See file expression.html for details. I do *not* reproduce it + * here in an effort to keep both files in sync. + * + * All functions receive the current expression object as parameter as well as the + * current tokenizer. + * + * rgerhards, 2008-02-19 + */ + +#if 0 +static rsRetVal +template(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR(""); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + +finalize_it: + RETiRet; +} +#endif + + + +static rsRetVal +terminal(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; + ctok_token_t token; +RUNLOG_STR("terminal"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(ctokGetNextToken(ctok, &token)); + + switch(token.tok) { + case ctok_SIMPSTR: + //CHKiRet(simpstr(pThis, ctok)); + break; + default: + ABORT_FINALIZE(RS_RET_SYNTAX_ERROR); + break; + } + +finalize_it: + RETiRet; +} + +static rsRetVal +factor(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR("factor"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(terminal(pThis, ctok)); + +finalize_it: + RETiRet; +} + + +static rsRetVal +term(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR("term"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(factor(pThis, ctok)); + +finalize_it: + RETiRet; +} + +static rsRetVal +val(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR("val"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(term(pThis, ctok)); + +finalize_it: + RETiRet; +} + + +static rsRetVal +e_cmp(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR("e_cmp"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(val(pThis, ctok)); + +finalize_it: + RETiRet; +} + + +static rsRetVal +e_and(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR("e_and"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(e_cmp(pThis, ctok)); + +finalize_it: + RETiRet; +} + + +static rsRetVal +expr(expr_t *pThis, ctok_t *ctok) +{ + DEFiRet; +RUNLOG_STR("expr"); + + ISOBJ_TYPE_assert(pThis, expr); + ISOBJ_TYPE_assert(ctok, ctok); + + CHKiRet(e_and(pThis, ctok)); + +finalize_it: + RETiRet; +} + + +/* ------------------------------ end parser functions ------------------------------ */ + + /* ------------------------------ actual expr object functions ------------------------------ */ /* Standard-Constructor @@ -112,12 +265,11 @@ rsRetVal exprParse(expr_t *pThis, ctok_t *ctok) { DEFiRet; - ctok_token_t token; ISOBJ_TYPE_assert(pThis, expr); ISOBJ_TYPE_assert(ctok, ctok); - CHKiRet(ctokGetNextToken(ctok, &token)); + CHKiRet(expr(pThis, ctok)); RUNLOG_STR("expr parser being called"); finalize_it: diff --git a/obj-types.h b/obj-types.h index e301b7bc..f63eb49e 100644 --- a/obj-types.h +++ b/obj-types.h @@ -175,7 +175,7 @@ finalize_it: \ * rgerhards, 2008-01-10 */ #define BEGINobjConstruct(obj) \ - rsRetVal obj##Initialize(obj##_t *pThis) \ + rsRetVal obj##Initialize(obj##_t __attribute__((unused)) *pThis) \ { \ DEFiRet; diff --git a/rsyslog.h b/rsyslog.h index 67e61e70..655cf5d9 100644 --- a/rsyslog.h +++ b/rsyslog.h @@ -124,6 +124,9 @@ enum rsRetVal_ /** return value. All methods return this if not specified oth RS_RET_NO_DRIVERS = -2048, /**< a required drivers missing */ RS_RET_NO_DRIVERNAME = -2049, /**< driver name missing where one was required */ RS_RET_EOS = -2050, /**< end of stream (of whatever) */ + RS_RET_SYNTAX_ERROR = -2051, /**< syntax error, eg. during parsing */ + RS_RET_INVALID_OCTAL_DIGIT = -2052, /**< invalid octal digit during parsing */ + RS_RET_INVALID_HEX_DIGIT = -2053, /**< invalid hex digit during parsing */ RS_RET_OK_DELETE_LISTENTRY = 1, /**< operation successful, but callee requested the deletion of an entry (special state) */ RS_RET_TERMINATE_NOW = 2, /**< operation successful, function is requested to terminate (mostly used with threads) */ RS_RET_NO_RUN = 3, /**< operation successful, but function does not like to be executed */ -- cgit