summaryrefslogtreecommitdiffstats
path: root/grammar/rscript.l
blob: 750a3e81997c4bf999ea545aed7b9046fd978df0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
 /* lex file for rsyslog config format v2.
  * Please note: this file introduces the new config format, but maintains
  * backward compatibility. In order to do so, the grammar is not 100% clean,
  * but IMHO still sufficiently easy both to understand for programmers
  * maitaining the code as well as users writing the config file. Users are,
  * of course, encouraged to use new constructs only. But it needs to be noted
  * that some of the legacy constructs (specifically the in-front-of-action
  * PRI filter) are very hard to beat in ease of use, at least for simpler
  * cases. So while we hope that cfsysline support can be dropped some time in
  * the future, we will probably keep these useful constructs.
  *
  * Copyright (C) 2011 by Rainer Gerhards and Adiscon GmbH
  * Released under the GNU GPL v3. For details see LICENSE file.
  */

%option noyywrap nodefault case-insensitive
 /*%option noyywrap nodefault case-insensitive */

%x INOBJ
	/* INOBJ is selected if we are inside an object (name/value pairs!) */
%x COMMENT
	/* COMMENT is "the usual trick" to handle C-style comments */
%x EXPR
	/* EXPR is a bit ugly, but we need it to support pre v6-syntax. The problem
	 * is that cfsysline statement start with $..., the same like variables in
	 * an expression. However, cfsysline statements can never appear inside an
	 * expression. So we create a specific expr mode, which is turned on after 
	 * we lexed a keyword that needs to be followed by an expression (using 
	 * knowledge from the upper layer...). In expr mode, we strictly do
	 * expression-based parsing. Expr mode is stopped when we reach a token
	 * that can not be part of an expression (currently only "then"). As I
	 * wrote this ugly, but the price needed to pay in order to remain
	 * compatible to the previous format.
	 */
%{
#include <ctype.h>
#include <libestr.h>
#include "utils.h"
#include "rscript.tab.h"
static int preCommentState;
%}

%%

 /* keywords */
"if"				{ printf("IF\n"); BEGIN EXPR; }
<EXPR>"then"			{ printf("THEN\n"); BEGIN INITIAL; }
<EXPR>"or"			{ printf("OR\n"); }
<EXPR>"and"			{ printf("AND\n"); }
<EXPR>"not"			{ printf("NOT\n"); }
<EXPR>"("			{ printf("LPAREN\n"); }
<EXPR>")"			{ printf("RPAREN\n"); }
<EXPR>"=="			{ printf("==\n"); }
<EXPR>"<="			{ printf("<=\n"); }
<EXPR>">="			{ printf(">=\n"); }
<EXPR>"!=" |
<EXPR>"<>"			{ printf("!=\n"); }
<EXPR>"<"			{ printf("<\n"); }
<EXPR>">"			{ printf(">\n"); }
<EXPR>"contains"		{ printf("CONTAINS\n"); }
<EXPR>"contains_i"		{ printf("CONTAINS_I\n"); }
<EXPR>"startswith"		{ printf("STARTSWITH\n"); }
<EXPR>"startswith_i"		{ printf("STARTSWITH_I\n"); }
<EXPR>-?0[0-7]+			{ printf("NUMBER (oct) %s\n", yytext); }
<EXPR>-?0x[0-7a-f]		{ printf("NUMBER (hex) %s\n", yytext); }
<EXPR>-?([1-9][0-9]*|0)		{ printf("NUMBER (dec) %s\n", yytext); }
<EXPR>\$[$!]{0,1}[a-z][a-z0-9\-_\.]*	{ printf("VARNAME: '%s'\n", yytext); }
<EXPR>\'([^'\\]|\\['])*\'	 { printf("EXPR string: -%s-\n", yytext); }
<EXPR>[ \t\n]
<EXPR>.				{ printf("invalid char in expr: %s\n", yytext); }
"&"				{ return '&'; }
"ruleset"			{ printf("RULESET\n"); }

"global"[ \n\t]*"("		{ yylval.objType = CNFOBJ_GLOBAL;
				  BEGIN INOBJ; return BEGINOBJ; }
"input"[ \n\t]*"("		{ yylval.objType = CNFOBJ_INPUT;
				  BEGIN INOBJ; return BEGINOBJ; }
"module"[ \n\t]*"("		{ yylval.objType = CNFOBJ_MODULE;
				  BEGIN INOBJ; return BEGINOBJ; }
"action"[ \n\t]*"("		{ BEGIN INOBJ; return BEGIN_ACTION; }
^[ \t]*:\$?[a-z]+[ ]*,[ ]*!?[a-z]+[ ]*,[ ]*\".*\"	{
				  printf("PROP-FILT: '%s'\n", yytext);
				}

^[ \t]*[,\*a-z]+\.[,!=;\.\*a-z]+ { printf("token prifilt '%s'\n", yytext); yylval.s = strdup(yytext); return PRIFILT; }

"*" |
\/[^*][^\n]* |
[\|\.\-:][^\n]+			{ printf("toke legacy_action '%s'\n", yytext);yylval.s = strdup(yytext); return LEGACY_ACTION; }
[a-z0-9_\-\+]+			{ printf("name: '%s'\n", yytext); }
<INOBJ>")"			{ BEGIN INITIAL; return ENDOBJ; }
<INOBJ>[a-z][a-z0-9_\.]*	{ yylval.estr = es_newStrFromCStr(yytext, yyleng);
				  return NAME; }
<INOBJ>"="			{ return(yytext[0]); }
<INOBJ>\"([^"\\]|\\['"?\\abfnrtv]|\\[0-7]{1,3})*\" {
				  yylval.estr = es_newStrFromCStr(yytext+1, yyleng-2);
				  return VALUE; }
"/*"				{ preCommentState = YY_START; BEGIN COMMENT; }
<EXPR>"/*"			{ preCommentState = YY_START; BEGIN COMMENT; }
<COMMENT>"*/"			{ BEGIN preCommentState; }
<COMMENT>([^*]|\n)+|.

<INOBJ>#.*\n	/* skip comments in input */
<INOBJ>[ \n\t]
<INOBJ>.			{ printf("INOBJ: invalid char '%s'\n", yytext); }

 /* CFSYSLINE is valid in all modes */
\$[a-z]+.*$			{ yylval.s = strdup(yytext); return CFSYSLINE; }
<INOBJ>\$[a-z]+.*$		{ yylval.s = strdup(yytext); return CFSYSLINE; }

\#.*\n	/* skip comments in input */
[\n\t ]	/* drop whitespace */
.				{ printf("invalid char: %s\n", yytext);
				}

 /*<<EOF>>			{ printf("EOF reached\n"); }*/

%%
 /*
int
main(int argc, char *argv[])
{
	es_str_t *str;
	YY_BUFFER_STATE bp;
	char ln[10240];

	readConfFile(stdin, &str);
	//printf("buffer: %s\n", es_getBufAddr(str));
	bp = yy_scan_buffer(es_getBufAddr(str), es_strlen(str));
	//yy_switch_to_buffer(bp);
	yylex();
}
*/