From 71003f146cc2dacfa8fc7c084404f3399812b64a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 4 Jul 2011 08:34:13 +0200 Subject: milestone: added operations to expr, added evaluation --- grammar/mini.samp | 2 +- grammar/rscript.l | 19 +++--- grammar/rscript.y | 26 +++++++- grammar/utils.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++++- grammar/utils.h | 19 ++++-- 5 files changed, 233 insertions(+), 20 deletions(-) diff --git a/grammar/mini.samp b/grammar/mini.samp index 9e00b7cf..8e00917f 100644 --- a/grammar/mini.samp +++ b/grammar/mini.samp @@ -24,7 +24,7 @@ if 1 then { /var/log/log3 @@fwd rger } -if 2*4/-5--(10-3)/*pri("*.*")*/ then { +if not 1==0 and 2*4/-5--(10-3)>7/*pri("*.*")*/ then { action(type="omfile" taget="/var/log/log5") action(type="omfile" taget="/var/log/log6") action(type="omfwd" taget="10.0.0.1" port="514") diff --git a/grammar/rscript.l b/grammar/rscript.l index f7e51e25..596becaf 100644 --- a/grammar/rscript.l +++ b/grammar/rscript.l @@ -55,21 +55,20 @@ static int preCommentState; "-" | "(" | ")" { return yytext[0]; } -"==" { printf("==\n"); } -"<=" { printf("<=\n"); } -">=" { printf(">=\n"); } +"==" { return CMP_EQ; } +"<=" { return CMP_LE; } +">=" { return CMP_GE; } "!=" | -"<>" { printf("!=\n"); } -"<" { printf("<\n"); } -">" { printf(">\n"); } +"<>" { return CMP_NE; } +"<" { return CMP_LT; } +">" { return CMP_GT; } "contains" { printf("CONTAINS\n"); } "contains_i" { printf("CONTAINS_I\n"); } "startswith" { printf("STARTSWITH\n"); } "startswith_i" { printf("STARTSWITH_I\n"); } -0[0-7]+ { printf("NUMBER (oct) %s\n", yytext); } -0x[0-7a-f] { printf("NUMBER (hex) %s\n", yytext); } -([1-9][0-9]*|0) { printf("NUMBER (dec) %s\n", yytext); - yylval.n = atoll(yytext); return NUMBER; } +0[0-7]+ | /* octal number */ +0x[0-7a-f] | /* hex number, following rule is dec; strtoll handles all! */ +([1-9][0-9]*|0) { yylval.n = strtoll(yytext, NULL, 0); return NUMBER; } \$[$!]{0,1}[a-z][a-z0-9\-_\.]* { printf("VARNAME: '%s'\n", yytext); return VAR; } \'([^'\\]|\\['])*\' { printf("EXPR string: -%s-\n", yytext); return STRING; } [ \t\n] diff --git a/grammar/rscript.y b/grammar/rscript.y index 3b680990..3652eec2 100644 --- a/grammar/rscript.y +++ b/grammar/rscript.y @@ -34,6 +34,12 @@ %token VAR %token STRING %token NUMBER +%token CMP_EQ +%token CMP_NE +%token CMP_LE +%token CMP_GE +%token CMP_LT +%token CMP_GT %type nv nvlst %type obj @@ -44,12 +50,13 @@ %type expr %left AND OR +%left CMP_EQ CMP_NE CMP_LE CMP_GE CMP_LT CMP_GT %left '+' '-' %left '*' '/' '%' %nonassoc UMINUS NOT %expect 3 -/* two shift/reduce conflicts are created by the CFSYSLINE construct, which we +/* these shift/reduce conflicts are created by the CFSYSLINE construct, which we * unfortunately can not avoid. The problem is that CFSYSLINE can occur both in * global context as well as within an action. It's not permitted somewhere else, * but this is suficient for conflicts. The "dangling else" built-in resolution @@ -85,7 +92,11 @@ rule: PRIFILT actlst { printf("PRIFILT: %s\n", $1); free($1); | PROPFILT actlst | scriptfilt -scriptfilt: IF expr THEN actlst { printf("if filter detected, expr:\n"); cnfexprPrint($2,0); } +scriptfilt: IF expr THEN actlst { printf("if filter detected, expr:\n"); cnfexprPrint($2,0); + struct exprret r; + cnfexprEval($2, &r); + printf("eval result: %lld\n", r.d.n); + } /* note: we can do some limited block-structuring with the v6 engine. In that case, * we must not support additonal filters inside the blocks, so they must consist of @@ -107,7 +118,16 @@ act: BEGIN_ACTION nvlst ENDOBJ { $$ = cnfactlstNew(CNFACT_V2, $2, NULL); } | LEGACY_ACTION { printf("legacy action: '%s'\n", $1); $$ = cnfactlstNew(CNFACT_LEGACY, NULL, $1); } -expr: expr '+' expr { $$ = cnfexprNew('+', $1, $3); } +expr: expr AND expr { $$ = cnfexprNew(AND, $1, $3); } + | expr OR expr { $$ = cnfexprNew(OR, $1, $3); } + | NOT expr { $$ = cnfexprNew(NOT, NULL, $2); } + | expr CMP_EQ expr { $$ = cnfexprNew(CMP_EQ, $1, $3); } + | expr CMP_NE expr { $$ = cnfexprNew(CMP_NE, $1, $3); } + | expr CMP_LE expr { $$ = cnfexprNew(CMP_LE, $1, $3); } + | expr CMP_GE expr { $$ = cnfexprNew(CMP_GE, $1, $3); } + | expr CMP_LT expr { $$ = cnfexprNew(CMP_LT, $1, $3); } + | expr CMP_GT expr { $$ = cnfexprNew(CMP_GT, $1, $3); } + | expr '+' expr { $$ = cnfexprNew('+', $1, $3); } | expr '-' expr { $$ = cnfexprNew('-', $1, $3); } | expr '*' expr { $$ = cnfexprNew('*', $1, $3); } | expr '/' expr { $$ = cnfexprNew('/', $1, $3); } diff --git a/grammar/utils.c b/grammar/utils.c index bc300aaf..4087bd81 100644 --- a/grammar/utils.c +++ b/grammar/utils.c @@ -4,6 +4,7 @@ #include #include #include "utils.h" +#include "rscript.tab.h" void readConfFile(FILE *fp, es_str_t **str) @@ -230,7 +231,7 @@ cnfactlstPrint(struct cnfactlst *actlst) } struct cnfexpr* -cnfexprNew(int nodetype, struct cnfexpr *l, struct cnfexpr *r) +cnfexprNew(unsigned nodetype, struct cnfexpr *l, struct cnfexpr *r) { struct cnfexpr *expr; @@ -250,6 +251,134 @@ done: return expr; } +/* ensure that retval is a number; if string is no number, + * emit error message and set number to 0. + */ +static inline long long +exprret2Number(struct exprret *r) +{ + if(r->datatype == 'S') { + printf("toNumber CONVERSION MISSING\n"); abort(); + } + return r->d.n; +} + +/* ensure that retval is a string; if string is no number, + * emit error message and set number to 0. + */ +static inline es_str_t * +exprret2String(struct exprret *r) +{ + if(r->datatype == 'N') { + printf("toString CONVERSION MISSING\n"); abort(); + } + return r->d.estr; +} + +#define COMP_NUM_BINOP(x) \ + cnfexprEval(expr->l, &l); \ + cnfexprEval(expr->r, &r); \ + ret->datatype = 'N'; \ + ret->d.n = exprret2Number(&l) x exprret2Number(&r) + +/* evaluate an expression. + * Note that we try to avoid malloc whenever possible (because on + * the large overhead it has, especially on highly threaded programs). + * As such, the each caller level must provide buffer space for the + * result on its stack during recursion. This permits the callee to store + * the return value without malloc. As the value is a somewhat larger + * struct, we could otherwise not return it without malloc. + * Note that we implement boolean shortcut operations. For our needs, there + * simply is no case where full evaluation would make any sense at all. + */ +void +cnfexprEval(struct cnfexpr *expr, struct exprret *ret) +{ + struct exprret r, l; /* memory for subexpression results */ + + printf("eval expr %p, type '%c'(%u)\n", expr, expr->nodetype, expr->nodetype); + switch(expr->nodetype) { + case CMP_EQ: + COMP_NUM_BINOP(==); + break; + case CMP_NE: + COMP_NUM_BINOP(!=); + break; + case CMP_LE: + COMP_NUM_BINOP(<=); + break; + case CMP_GE: + COMP_NUM_BINOP(>=); + break; + case CMP_LT: + COMP_NUM_BINOP(<); + break; + case CMP_GT: + COMP_NUM_BINOP(>); + break; + case OR: + cnfexprEval(expr->l, &l); + ret->datatype = 'N'; + if(exprret2Number(&l)) { + ret->d.n = 1ll; + } else { + cnfexprEval(expr->r, &r); + if(exprret2Number(&r)) + ret->d.n = 1ll; + else + ret->d.n = 0ll; + } + break; + case AND: + cnfexprEval(expr->l, &l); + ret->datatype = 'N'; + if(exprret2Number(&l)) { + cnfexprEval(expr->r, &r); + if(exprret2Number(&r)) + ret->d.n = 1ll; + else + ret->d.n = 0ll; + } else { + ret->d.n = 0ll; + } + break; + case NOT: + cnfexprEval(expr->r, &r); + ret->datatype = 'N'; + ret->d.n = !exprret2Number(&l); + break; + case 'N': + ret->datatype = 'N'; + ret->d.n = ((struct cnfnumval*)expr)->val; + break; + case '+': + COMP_NUM_BINOP(+); + break; + case '-': + COMP_NUM_BINOP(-); + break; + case '*': + COMP_NUM_BINOP(*); + break; + case '/': + COMP_NUM_BINOP(/); + break; + case '%': + COMP_NUM_BINOP(%); + break; + case 'M': + cnfexprEval(expr->r, &r); + ret->datatype = 'N'; + ret->d.n = -exprret2Number(&r); + break; + default: + ret->datatype = 'N'; + ret->d.n = 0ll; + printf("eval error: unknown nodetype %u\n", + (unsigned) expr->nodetype); + break; + } +} inline static void doIndent(indent) @@ -263,6 +392,59 @@ cnfexprPrint(struct cnfexpr *expr, int indent) { //printf("expr %p, indent %d, type '%c'\n", expr, indent, expr->nodetype); switch(expr->nodetype) { + case CMP_EQ: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf("==\n"); + cnfexprPrint(expr->r, indent+1); + break; + case CMP_NE: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf("!=\n"); + cnfexprPrint(expr->r, indent+1); + break; + case CMP_LE: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf("<=\n"); + cnfexprPrint(expr->r, indent+1); + break; + case CMP_GE: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf(">=\n"); + cnfexprPrint(expr->r, indent+1); + break; + case CMP_LT: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf("<\n"); + cnfexprPrint(expr->r, indent+1); + break; + case CMP_GT: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf(">\n"); + cnfexprPrint(expr->r, indent+1); + break; + case OR: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf("OR\n"); + cnfexprPrint(expr->r, indent+1); + break; + case AND: + cnfexprPrint(expr->l, indent+1); + doIndent(indent); + printf("AND\n"); + cnfexprPrint(expr->r, indent+1); + break; + case NOT: + doIndent(indent); + printf("NOT\n"); + cnfexprPrint(expr->r, indent+1); + break; case 'N': doIndent(indent); printf("%lld\n", ((struct cnfnumval*)expr)->val); @@ -280,7 +462,8 @@ cnfexprPrint(struct cnfexpr *expr, int indent) cnfexprPrint(expr->r, indent+1); break; default: - printf("error: unknown nodetype\n"); + printf("error: unknown nodetype %u\n", + (unsigned) expr->nodetype); break; } } diff --git a/grammar/utils.h b/grammar/utils.h index a402d4b2..f52bc1e0 100644 --- a/grammar/utils.h +++ b/grammar/utils.h @@ -62,18 +62,18 @@ struct cnfactlst { * be the sole foundation for the AST. */ struct cnfexpr { - int nodetype; + unsigned nodetype; struct cnfexpr *l; struct cnfexpr *r; }; struct cnfnumval { - int nodetype; + unsigned nodetype; long long val; }; struct cnfstringval { - int nodetype; + unsigned nodetype; es_str_t *estr; }; @@ -83,6 +83,16 @@ struct x { }; */ +/* the return value of an expresion evaluation */ +struct exprret { + union { + es_str_t *estr; + long long n; + } d; + char datatype; /* 'N' - number, 'S' - string */ +}; + + void readConfFile(FILE *fp, es_str_t **str); struct nvlst* nvlstNew(es_str_t *name, es_str_t *value); void nvlstDestruct(struct nvlst *lst); @@ -95,8 +105,9 @@ void cnfactlstDestruct(struct cnfactlst *actlst); void cnfactlstPrint(struct cnfactlst *actlst); struct cnfactlst* cnfactlstAddSysline(struct cnfactlst* actlst, char *line); struct cnfactlst* cnfactlstReverse(struct cnfactlst *actlst); -struct cnfexpr* cnfexprNew(int nodetype, struct cnfexpr *l, struct cnfexpr *r); +struct cnfexpr* cnfexprNew(unsigned nodetype, struct cnfexpr *l, struct cnfexpr *r); void cnfexprPrint(struct cnfexpr *expr, int indent); +void cnfexprEval(struct cnfexpr *expr, struct exprret *ret); struct cnfnumval* cnfnumvalNew(long long val); struct cnfstringval* cnfstringvalNew(es_str_t *estr); -- cgit