/* cfgtok.c - helper class to tokenize an input stream - which surprisingly
* currently does not work with streams but with string. But that will
* probably change over time ;) This class was originally written to support
* the expression module but may evolve when (if) the expression module is
* expanded (or aggregated) by a full-fledged ctoken based config parser.
* Obviously, this class is used together with config files and not any other
* parse function.
*
* Module begun 2008-02-19 by Rainer Gerhards
*
* Copyright 2008 Rainer Gerhards and Adiscon GmbH.
*
* This file is part of rsyslog.
*
* Rsyslog is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Rsyslog is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Rsyslog. If not, see .
*
* A copy of the GPL can be found in the file "COPYING" in this distribution.
*/
#include "config.h"
#include
#include
#include
#include
#include "rsyslog.h"
#include "template.h"
#include "ctok.h"
/* static data */
DEFobjStaticHelpers
DEFobjCurrIf(ctok_token)
DEFobjCurrIf(var)
/* Standard-Constructor
*/
BEGINobjConstruct(ctok) /* be sure to specify the object type also in END macro! */
ENDobjConstruct(ctok)
/* ConstructionFinalizer
* rgerhards, 2008-01-09
*/
rsRetVal ctokConstructFinalize(ctok_t __attribute__((unused)) *pThis)
{
DEFiRet;
RETiRet;
}
/* destructor for the ctok object */
BEGINobjDestruct(ctok) /* be sure to specify the object type also in END and CODESTART macros! */
CODESTARTobjDestruct(ctok)
/* ... then free resources */
ENDobjDestruct(ctok)
/* unget character from input stream. At most one character can be ungotten.
* This funtion is only permitted to be called after at least one character
* has been read from the stream. Right now, we handle the situation simply by
* moving the string "stream" pointer one position backwards. If we work with
* real streams (some time), the strm object will handle the functionality
* itself. -- rgerhards, 2008-02-19
*/
static rsRetVal
ctokUngetCharFromStream(ctok_t *pThis, uchar __attribute__((unused)) c)
{
DEFiRet;
ISOBJ_TYPE_assert(pThis, ctok);
--pThis->pp;
RETiRet;
}
/* get the next character from the input "stream" (currently just a in-memory
* string...) -- rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetCharFromStream(ctok_t *pThis, uchar *pc)
{
DEFiRet;
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(pc != NULL);
/* end of string or begin of comment terminates the "stream" */
if(*pThis->pp == '\0' || *pThis->pp == '#') {
ABORT_FINALIZE(RS_RET_EOS);
} else {
*pc = *pThis->pp;
++pThis->pp;
}
finalize_it:
RETiRet;
}
/* skip whitespace in the input "stream".
* rgerhards, 2008-02-19
*/
static rsRetVal
ctokSkipWhitespaceFromStream(ctok_t *pThis)
{
DEFiRet;
uchar c;
ISOBJ_TYPE_assert(pThis, ctok);
CHKiRet(ctokGetCharFromStream(pThis, &c));
while(isspace(c)) {
CHKiRet(ctokGetCharFromStream(pThis, &c));
}
/* we must unget the one non-whitespace we found */
CHKiRet(ctokUngetCharFromStream(pThis, c));
dbgprintf("skipped whitepsace, stream now '%s'\n", pThis->pp);
finalize_it:
RETiRet;
}
/* get the next word from the input "stream" (currently just a in-memory
* string...). A word is anything from the current location until the
* first non-alphanumeric character. If the word is longer
* than the provided memory buffer, parsing terminates when buffer length
* has been reached. A buffer of 128 bytes or more should always be by
* far sufficient. -- rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetWordFromStream(ctok_t *pThis, uchar *pWordBuf, size_t lenWordBuf)
{
DEFiRet;
uchar c;
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(pWordBuf != NULL);
ASSERT(lenWordBuf > 0);
CHKiRet(ctokSkipWhitespaceFromStream(pThis));
CHKiRet(ctokGetCharFromStream(pThis, &c));
while((isalnum(c) || c == '_' || c == '-') && lenWordBuf > 1) {
*pWordBuf++ = c;
--lenWordBuf;
CHKiRet(ctokGetCharFromStream(pThis, &c));
}
*pWordBuf = '\0'; /* there is always space for this - see while() */
/* push back the char that we have read too much */
CHKiRet(ctokUngetCharFromStream(pThis, c));
dbgprintf("end ctokGetWordFromStream, stream now '%s'\n", pThis->pp);
finalize_it:
RETiRet;
}
/* read in a constant number
* This is the "number" ABNF element
* rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetNumber(ctok_t *pThis, ctok_token_t *pToken)
{
DEFiRet;
number_t n; /* the parsed number */
uchar c;
int valC;
int iBase;
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(pToken != NULL);
pToken->tok = ctok_NUMBER;
CHKiRet(ctokGetCharFromStream(pThis, &c));
if(c == '0') { /* octal? */
CHKiRet(ctokGetCharFromStream(pThis, &c));
if(c == 'x') { /* nope, hex! */
CHKiRet(ctokGetCharFromStream(pThis, &c));
c = tolower(c);
iBase = 16;
} else {
iBase = 8;
}
} else {
iBase = 10;
}
n = 0;
/* this loop is quite simple, a variable name is terminated by whitespace. */
while(isdigit(c) || (c >= 'a' && c <= 'f')) {
if(isdigit(c)) {
valC = c - '0';
} else {
valC = c - 'a' + 10;
}
if(valC >= iBase) {
if(iBase == 8) {
ABORT_FINALIZE(RS_RET_INVALID_OCTAL_DIGIT);
} else {
ABORT_FINALIZE(RS_RET_INVALID_HEX_DIGIT);
}
}
/* we now have the next value and know it is right */
n = n * iBase + valC;
CHKiRet(ctokGetCharFromStream(pThis, &c));
c = tolower(c);
}
/* we need to unget the character that made the loop terminate */
CHKiRet(ctokUngetCharFromStream(pThis, c));
CHKiRet(var.SetNumber(pToken->pVar, n));
dbgprintf("number, number is: '%lld'\n", n);
finalize_it:
RETiRet;
}
/* read in a variable
* This covers both msgvar and sysvar from the ABNF.
* rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetVar(ctok_t *pThis, ctok_token_t *pToken)
{
DEFiRet;
uchar c;
cstr_t *pstrVal;
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(pToken != NULL);
CHKiRet(ctokGetCharFromStream(pThis, &c));
if(c == '$') { /* second dollar, we have a system variable */
pToken->tok = ctok_SYSVAR;
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* "eat" it... */
} else {
pToken->tok = ctok_MSGVAR;
}
CHKiRet(rsCStrConstruct(&pstrVal));
/* this loop is quite simple, a variable name is terminated by whitespace. */
while(!isspace(c)) {
CHKiRet(rsCStrAppendChar(pstrVal, tolower(c)));
CHKiRet(ctokGetCharFromStream(pThis, &c));
}
CHKiRet(rsCStrFinish(pStrB));
dbgprintf("var, var is: '%s'\n", rsCStrGetSzStr(pstrVal));
CHKiRet(var.SetString(pToken->pVar, pstrVal));
pstrVal = NULL;
finalize_it:
if(iRet != RS_RET_OK) {
if(pstrVal != NULL) {
rsCStrDestruct(&pstrVal);
}
}
RETiRet;
}
/* read in a simple string (simpstr in ABNF)
* rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetSimpStr(ctok_t *pThis, ctok_token_t *pToken)
{
DEFiRet;
uchar c;
int bInEsc = 0;
cstr_t *pstrVal;
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(pToken != NULL);
pToken->tok = ctok_SIMPSTR;
CHKiRet(rsCStrConstruct(&pstrVal));
CHKiRet(ctokGetCharFromStream(pThis, &c));
/* while we are in escape mode (had a backslash), no sequence
* terminates the loop. If outside, it is terminated by a single quote.
*/
while(bInEsc || c != '\'') {
if(bInEsc) {
CHKiRet(rsCStrAppendChar(pstrVal, c));
bInEsc = 0;
} else {
if(c == '\\') {
bInEsc = 1;
} else {
CHKiRet(rsCStrAppendChar(pstrVal, c));
}
}
CHKiRet(ctokGetCharFromStream(pThis, &c));
}
CHKiRet(rsCStrFinish(pStrB));
dbgprintf("simpstr, str is: '%s'\n", rsCStrGetSzStr(pstrVal));
CHKiRet(var.SetString(pToken->pVar, pstrVal));
pstrVal = NULL;
finalize_it:
if(iRet != RS_RET_OK) {
if(pstrVal != NULL) {
rsCStrDestruct(&pstrVal);
}
}
RETiRet;
}
/* Unget a token. The token ungotten will be returned the next time
* ctokGetToken() is called. Only one token can be ungotten at a time.
* If a second token is ungotten, the first is lost. This is considered
* a programming error.
* rgerhards, 2008-02-20
*/
static rsRetVal
ctokUngetToken(ctok_t *pThis, ctok_token_t *pToken)
{
DEFiRet;
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(pToken != NULL);
ASSERT(pThis->pUngotToken == NULL);
pThis->pUngotToken = pToken;
RETiRet;
}
/* skip an inine comment (just like a C-comment)
* rgerhards, 2008-02-20
*/
static rsRetVal
ctokSkipInlineComment(ctok_t *pThis)
{
DEFiRet;
uchar c;
int bHadAsterisk = 0;
ISOBJ_TYPE_assert(pThis, ctok);
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
while(!(bHadAsterisk && c == '/')) {
bHadAsterisk = (c == '*') ? 1 : 0;
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read next */
}
finalize_it:
RETiRet;
}
/* Get the *next* token from the input stream. This parses the next token and
* ignores any whitespace in between. End of stream is communicated via iRet.
* The returned token must either be destructed by the caller OR being passed
* back to ctokUngetToken().
* rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetToken(ctok_t *pThis, ctok_token_t **ppToken)
{
DEFiRet;
ctok_token_t *pToken;
uchar c;
uchar szWord[128];
int bRetry = 0; /* retry parse? Only needed for inline comments... */
ISOBJ_TYPE_assert(pThis, ctok);
ASSERT(ppToken != NULL);
/* first check if we have an ungotten token and, if so, provide that
* one back (without any parsing). -- rgerhards, 2008-02-20
*/
if(pThis->pUngotToken != NULL) {
*ppToken = pThis->pUngotToken;
pThis->pUngotToken = NULL;
FINALIZE;
}
/* setup the stage - create our token */
CHKiRet(ctok_token.Construct(&pToken));
CHKiRet(ctok_token.ConstructFinalize(pToken));
/* find the next token. We may loop when we have inline comments */
do {
bRetry = 0;
CHKiRet(ctokSkipWhitespaceFromStream(pThis));
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
switch(c) {
case '=': /* == */
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
pToken->tok = (c == '=')? ctok_CMP_EQ : ctok_INVALID;
break;
case '!': /* != */
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
pToken->tok = (c == '=')? ctok_CMP_NEQ : ctok_INVALID;
break;
case '<': /* <, <=, <> */
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
if(c == '=') {
pToken->tok = ctok_CMP_LTEQ;
} else if(c == '>') {
pToken->tok = ctok_CMP_NEQ;
} else {
pToken->tok = ctok_CMP_LT;
}
break;
case '>': /* >, >= */
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
if(c == '=') {
pToken->tok = ctok_CMP_GTEQ;
} else {
pToken->tok = ctok_CMP_GT;
}
break;
case '+':
pToken->tok = ctok_PLUS;
break;
case '-':
pToken->tok = ctok_MINUS;
break;
case '*':
pToken->tok = ctok_TIMES;
break;
case '/': /* /, /.* ... *./ (comments, mungled here for obvious reasons...) */
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
if(c == '*') {
/* we have a comment and need to skip it */
ctokSkipInlineComment(pThis);
bRetry = 1;
} else {
CHKiRet(ctokUngetCharFromStream(pThis, c)); /* put back, not processed */
}
pToken->tok = ctok_DIV;
break;
case '%':
pToken->tok = ctok_MOD;
break;
case '(':
pToken->tok = ctok_LPAREN;
break;
case ')':
pToken->tok = ctok_RPAREN;
break;
case ',':
pToken->tok = ctok_COMMA;
break;
case '&':
pToken->tok = ctok_STRADD;
break;
case '$':
CHKiRet(ctokGetVar(pThis, pToken));
break;
case '\'': /* simple string, this is somewhat more elaborate */
CHKiRet(ctokGetSimpStr(pThis, pToken));
break;
case '"':
/* TODO: template string parser */
ABORT_FINALIZE(RS_RET_NOT_IMPLEMENTED);
break;
default:
CHKiRet(ctokUngetCharFromStream(pThis, c)); /* push back, we need it in any case */
if(isdigit(c)) {
CHKiRet(ctokGetNumber(pThis, pToken));
} else { /* now we check if we have a multi-char sequence */
CHKiRet(ctokGetWordFromStream(pThis, szWord, sizeof(szWord)/sizeof(uchar)));
if(!strcasecmp((char*)szWord, "and")) {
pToken->tok = ctok_AND;
} else if(!strcasecmp((char*)szWord, "or")) {
pToken->tok = ctok_OR;
} else if(!strcasecmp((char*)szWord, "not")) {
pToken->tok = ctok_NOT;
} else if(!strcasecmp((char*)szWord, "contains")) {
pToken->tok = ctok_CMP_CONTAINS;
} else if(!strcasecmp((char*)szWord, "contains_i")) {
pToken->tok = ctok_CMP_CONTAINSI;
} else if(!strcasecmp((char*)szWord, "startswith")) {
pToken->tok = ctok_CMP_STARTSWITH;
} else if(!strcasecmp((char*)szWord, "startswith_i")) {
pToken->tok = ctok_CMP_STARTSWITHI;
} else if(!strcasecmp((char*)szWord, "then")) {
pToken->tok = ctok_THEN;
} else {
/* finally, we check if it is a function */
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
if(c == '(') {
/* push c back, higher level parser needs it */
CHKiRet(ctokUngetCharFromStream(pThis, c));
pToken->tok = ctok_FUNCTION;
// TODO: fill function name
} else { /* give up... */
pToken->tok = ctok_INVALID;
}
}
}
break;
}
} while(bRetry); /* warning: do ... while()! */
*ppToken = pToken;
dbgoprint((obj_t*) pToken, "token: %d\n", pToken->tok);
finalize_it:
if(iRet != RS_RET_OK) {
if(pToken != NULL)
ctok_token.Destruct(&pToken);
}
RETiRet;
}
/* property set methods */
/* simple ones first */
DEFpropSetMeth(ctok, pp, uchar*)
/* return the current position of pp - most important as currently we do only
* partial parsing, so the rest must know where to start from...
* rgerhards, 2008-02-19
*/
static rsRetVal
ctokGetpp(ctok_t *pThis, uchar **pp)
{
DEFiRet;
ASSERT(pp != NULL);
*pp = pThis->pp;
RETiRet;
}
/* queryInterface function
* rgerhards, 2008-02-21
*/
BEGINobjQueryInterface(ctok)
CODESTARTobjQueryInterface(ctok)
if(pIf->ifVersion != ctokCURR_IF_VERSION) { /* check for current version, increment on each change */
ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED);
}
/* ok, we have the right interface, so let's fill it
* Please note that we may also do some backwards-compatibility
* work here (if we can support an older interface version - that,
* of course, also affects the "if" above).
*/
//xxxpIf->oID = OBJctok;
pIf->Construct = ctokConstruct;
pIf->ConstructFinalize = ctokConstructFinalize;
pIf->Destruct = ctokDestruct;
pIf->Getpp = ctokGetpp;
pIf->GetToken = ctokGetToken;
pIf->UngetToken = ctokUngetToken;
pIf->Setpp = ctokSetpp;
finalize_it:
ENDobjQueryInterface(ctok)
BEGINObjClassInit(ctok, 1, OBJ_IS_CORE_MODULE) /* class, version */
/* request objects we use */
CHKiRet(objUse(ctok_token, CORE_COMPONENT));
CHKiRet(objUse(var, CORE_COMPONENT));
OBJSetMethodHandler(objMethod_CONSTRUCTION_FINALIZER, ctokConstructFinalize);
ENDObjClassInit(ctok)
/* vi:set ai:
*/