diff options
Diffstat (limited to 'runtime/ctok.c')
-rw-r--r-- | runtime/ctok.c | 593 |
1 files changed, 593 insertions, 0 deletions
diff --git a/runtime/ctok.c b/runtime/ctok.c new file mode 100644 index 00000000..de2bd8a8 --- /dev/null +++ b/runtime/ctok.c @@ -0,0 +1,593 @@ +/* cfgtok.c - helper class to tokenize an input stream - which surprisingly + * currently does not work with streams but with string. But that will + * probably change over time ;) This class was originally written to support + * the expression module but may evolve when (if) the expression module is + * expanded (or aggregated) by a full-fledged ctoken based config parser. + * Obviously, this class is used together with config files and not any other + * parse function. + * + * Module begun 2008-02-19 by Rainer Gerhards + * + * Copyright (C) 2008 by Rainer Gerhards and Adiscon GmbH. + * + * This file is part of the rsyslog runtime library. + * + * The rsyslog runtime library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The rsyslog runtime library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the rsyslog runtime library. If not, see <http://www.gnu.org/licenses/>. + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution. + */ + +#include "config.h" +#include <stdlib.h> +#include <ctype.h> +#include <strings.h> +#include <assert.h> + +#include "rsyslog.h" +#include "template.h" +#include "ctok.h" + +/* static data */ +DEFobjStaticHelpers +DEFobjCurrIf(ctok_token) +DEFobjCurrIf(var) + + +/* Standard-Constructor + */ +BEGINobjConstruct(ctok) /* be sure to specify the object type also in END macro! */ +ENDobjConstruct(ctok) + + +/* ConstructionFinalizer + * rgerhards, 2008-01-09 + */ +rsRetVal ctokConstructFinalize(ctok_t __attribute__((unused)) *pThis) +{ + DEFiRet; + RETiRet; +} + + +/* destructor for the ctok object */ +BEGINobjDestruct(ctok) /* be sure to specify the object type also in END and CODESTART macros! */ +CODESTARTobjDestruct(ctok) + /* ... then free resources */ +ENDobjDestruct(ctok) + + +/* unget character from input stream. At most one character can be ungotten. + * This funtion is only permitted to be called after at least one character + * has been read from the stream. Right now, we handle the situation simply by + * moving the string "stream" pointer one position backwards. If we work with + * real streams (some time), the strm object will handle the functionality + * itself. -- rgerhards, 2008-02-19 + */ +static rsRetVal +ctokUngetCharFromStream(ctok_t *pThis, uchar __attribute__((unused)) c) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, ctok); + --pThis->pp; + + RETiRet; +} + + +/* get the next character from the input "stream" (currently just a in-memory + * string...) -- rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetCharFromStream(ctok_t *pThis, uchar *pc) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pc != NULL); + + /* end of string or begin of comment terminates the "stream" */ + if(*pThis->pp == '\0' || *pThis->pp == '#') { + ABORT_FINALIZE(RS_RET_EOS); + } else { + *pc = *pThis->pp; + ++pThis->pp; + } + +finalize_it: + RETiRet; +} + + +/* skip whitespace in the input "stream". + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokSkipWhitespaceFromStream(ctok_t *pThis) +{ + DEFiRet; + uchar c; + + ISOBJ_TYPE_assert(pThis, ctok); + + CHKiRet(ctokGetCharFromStream(pThis, &c)); + while(isspace(c)) { + CHKiRet(ctokGetCharFromStream(pThis, &c)); + } + + /* we must unget the one non-whitespace we found */ + CHKiRet(ctokUngetCharFromStream(pThis, c)); + +dbgprintf("skipped whitepsace, stream now '%s'\n", pThis->pp); +finalize_it: + RETiRet; +} + + +/* get the next word from the input "stream" (currently just a in-memory + * string...). A word is anything from the current location until the + * first non-alphanumeric character. If the word is longer + * than the provided memory buffer, parsing terminates when buffer length + * has been reached. A buffer of 128 bytes or more should always be by + * far sufficient. -- rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetWordFromStream(ctok_t *pThis, uchar *pWordBuf, size_t lenWordBuf) +{ + DEFiRet; + uchar c; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pWordBuf != NULL); + ASSERT(lenWordBuf > 0); + + CHKiRet(ctokSkipWhitespaceFromStream(pThis)); + + CHKiRet(ctokGetCharFromStream(pThis, &c)); + while((isalnum(c) || c == '_' || c == '-') && lenWordBuf > 1) { + *pWordBuf++ = c; + --lenWordBuf; + CHKiRet(ctokGetCharFromStream(pThis, &c)); + } + *pWordBuf = '\0'; /* there is always space for this - see while() */ + + /* push back the char that we have read too much */ + CHKiRet(ctokUngetCharFromStream(pThis, c)); + +finalize_it: + RETiRet; +} + + +/* read in a constant number + * This is the "number" ABNF element + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetNumber(ctok_t *pThis, ctok_token_t *pToken) +{ + DEFiRet; + number_t n; /* the parsed number */ + uchar c; + int valC; + int iBase; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pToken != NULL); + + pToken->tok = ctok_NUMBER; + + CHKiRet(ctokGetCharFromStream(pThis, &c)); + if(c == '0') { /* octal? */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); + if(c == 'x') { /* nope, hex! */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); + c = tolower(c); + iBase = 16; + } else { + iBase = 8; + } + } else { + iBase = 10; + } + + n = 0; + /* this loop is quite simple, a variable name is terminated by whitespace. */ + while(isdigit(c) || (c >= 'a' && c <= 'f')) { + if(isdigit(c)) { + valC = c - '0'; + } else { + valC = c - 'a' + 10; + } + + if(valC >= iBase) { + if(iBase == 8) { + ABORT_FINALIZE(RS_RET_INVALID_OCTAL_DIGIT); + } else { + ABORT_FINALIZE(RS_RET_INVALID_HEX_DIGIT); + } + } + /* we now have the next value and know it is right */ + n = n * iBase + valC; + CHKiRet(ctokGetCharFromStream(pThis, &c)); + c = tolower(c); + } + + /* we need to unget the character that made the loop terminate */ + CHKiRet(ctokUngetCharFromStream(pThis, c)); + + CHKiRet(var.SetNumber(pToken->pVar, n)); + +finalize_it: + RETiRet; +} + + +/* read in a variable + * This covers both msgvar and sysvar from the ABNF. + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetVar(ctok_t *pThis, ctok_token_t *pToken) +{ + DEFiRet; + uchar c; + cstr_t *pstrVal; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pToken != NULL); + + CHKiRet(ctokGetCharFromStream(pThis, &c)); + + if(c == '$') { /* second dollar, we have a system variable */ + pToken->tok = ctok_SYSVAR; + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* "eat" it... */ + } else { + pToken->tok = ctok_MSGVAR; + } + + CHKiRet(rsCStrConstruct(&pstrVal)); + /* this loop is quite simple, a variable name is terminated by whitespace. */ + while(!isspace(c)) { + CHKiRet(rsCStrAppendChar(pstrVal, tolower(c))); + CHKiRet(ctokGetCharFromStream(pThis, &c)); + } + CHKiRet(rsCStrFinish(pStrB)); + + CHKiRet(var.SetString(pToken->pVar, pstrVal)); + pstrVal = NULL; + +finalize_it: + if(iRet != RS_RET_OK) { + if(pstrVal != NULL) { + rsCStrDestruct(&pstrVal); + } + } + + RETiRet; +} + + +/* read in a simple string (simpstr in ABNF) + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetSimpStr(ctok_t *pThis, ctok_token_t *pToken) +{ + DEFiRet; + uchar c; + int bInEsc = 0; + cstr_t *pstrVal; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pToken != NULL); + + pToken->tok = ctok_SIMPSTR; + + CHKiRet(rsCStrConstruct(&pstrVal)); + CHKiRet(ctokGetCharFromStream(pThis, &c)); + /* while we are in escape mode (had a backslash), no sequence + * terminates the loop. If outside, it is terminated by a single quote. + */ + while(bInEsc || c != '\'') { + if(bInEsc) { + CHKiRet(rsCStrAppendChar(pstrVal, c)); + bInEsc = 0; + } else { + if(c == '\\') { + bInEsc = 1; + } else { + CHKiRet(rsCStrAppendChar(pstrVal, c)); + } + } + CHKiRet(ctokGetCharFromStream(pThis, &c)); + } + CHKiRet(rsCStrFinish(pStrB)); + + CHKiRet(var.SetString(pToken->pVar, pstrVal)); + pstrVal = NULL; + +finalize_it: + if(iRet != RS_RET_OK) { + if(pstrVal != NULL) { + rsCStrDestruct(&pstrVal); + } + } + + RETiRet; +} + + +/* Unget a token. The token ungotten will be returned the next time + * ctokGetToken() is called. Only one token can be ungotten at a time. + * If a second token is ungotten, the first is lost. This is considered + * a programming error. + * rgerhards, 2008-02-20 + */ +static rsRetVal +ctokUngetToken(ctok_t *pThis, ctok_token_t *pToken) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(pToken != NULL); + ASSERT(pThis->pUngotToken == NULL); + + pThis->pUngotToken = pToken; + + RETiRet; +} + + +/* skip an inine comment (just like a C-comment) + * rgerhards, 2008-02-20 + */ +static rsRetVal +ctokSkipInlineComment(ctok_t *pThis) +{ + DEFiRet; + uchar c; + int bHadAsterisk = 0; + + ISOBJ_TYPE_assert(pThis, ctok); + + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + while(!(bHadAsterisk && c == '/')) { + bHadAsterisk = (c == '*') ? 1 : 0; + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read next */ + } + +finalize_it: + RETiRet; +} + + + +/* Get the *next* token from the input stream. This parses the next token and + * ignores any whitespace in between. End of stream is communicated via iRet. + * The returned token must either be destructed by the caller OR being passed + * back to ctokUngetToken(). + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetToken(ctok_t *pThis, ctok_token_t **ppToken) +{ + DEFiRet; + ctok_token_t *pToken; + uchar c; + uchar szWord[128]; + int bRetry = 0; /* retry parse? Only needed for inline comments... */ + + ISOBJ_TYPE_assert(pThis, ctok); + ASSERT(ppToken != NULL); + + /* first check if we have an ungotten token and, if so, provide that + * one back (without any parsing). -- rgerhards, 2008-02-20 + */ + if(pThis->pUngotToken != NULL) { + *ppToken = pThis->pUngotToken; + pThis->pUngotToken = NULL; + FINALIZE; + } + + /* setup the stage - create our token */ + CHKiRet(ctok_token.Construct(&pToken)); + CHKiRet(ctok_token.ConstructFinalize(pToken)); + + /* find the next token. We may loop when we have inline comments */ + do { + bRetry = 0; + CHKiRet(ctokSkipWhitespaceFromStream(pThis)); + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + switch(c) { + case '=': /* == */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + pToken->tok = (c == '=')? ctok_CMP_EQ : ctok_INVALID; + break; + case '!': /* != */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + pToken->tok = (c == '=')? ctok_CMP_NEQ : ctok_INVALID; + break; + case '<': /* <, <=, <> */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + if(c == '=') { + pToken->tok = ctok_CMP_LTEQ; + } else if(c == '>') { + pToken->tok = ctok_CMP_NEQ; + } else { + pToken->tok = ctok_CMP_LT; + } + break; + case '>': /* >, >= */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + if(c == '=') { + pToken->tok = ctok_CMP_GTEQ; + } else { + pToken->tok = ctok_CMP_GT; + } + break; + case '+': + pToken->tok = ctok_PLUS; + break; + case '-': + pToken->tok = ctok_MINUS; + break; + case '*': + pToken->tok = ctok_TIMES; + break; + case '/': /* /, /.* ... *./ (comments, mungled here for obvious reasons...) */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + if(c == '*') { + /* we have a comment and need to skip it */ + ctokSkipInlineComment(pThis); + bRetry = 1; + } else { + CHKiRet(ctokUngetCharFromStream(pThis, c)); /* put back, not processed */ + } + pToken->tok = ctok_DIV; + break; + case '%': + pToken->tok = ctok_MOD; + break; + case '(': + pToken->tok = ctok_LPAREN; + break; + case ')': + pToken->tok = ctok_RPAREN; + break; + case ',': + pToken->tok = ctok_COMMA; + break; + case '&': + pToken->tok = ctok_STRADD; + break; + case '$': + CHKiRet(ctokGetVar(pThis, pToken)); + break; + case '\'': /* simple string, this is somewhat more elaborate */ + CHKiRet(ctokGetSimpStr(pThis, pToken)); + break; + case '"': + /* TODO: template string parser */ + ABORT_FINALIZE(RS_RET_NOT_IMPLEMENTED); + break; + default: + CHKiRet(ctokUngetCharFromStream(pThis, c)); /* push back, we need it in any case */ + if(isdigit(c)) { + CHKiRet(ctokGetNumber(pThis, pToken)); + } else { /* now we check if we have a multi-char sequence */ + CHKiRet(ctokGetWordFromStream(pThis, szWord, sizeof(szWord)/sizeof(uchar))); + if(!strcasecmp((char*)szWord, "and")) { + pToken->tok = ctok_AND; + } else if(!strcasecmp((char*)szWord, "or")) { + pToken->tok = ctok_OR; + } else if(!strcasecmp((char*)szWord, "not")) { + pToken->tok = ctok_NOT; + } else if(!strcasecmp((char*)szWord, "contains")) { + pToken->tok = ctok_CMP_CONTAINS; + } else if(!strcasecmp((char*)szWord, "contains_i")) { + pToken->tok = ctok_CMP_CONTAINSI; + } else if(!strcasecmp((char*)szWord, "startswith")) { + pToken->tok = ctok_CMP_STARTSWITH; + } else if(!strcasecmp((char*)szWord, "startswith_i")) { + pToken->tok = ctok_CMP_STARTSWITHI; + } else if(!strcasecmp((char*)szWord, "then")) { + pToken->tok = ctok_THEN; + } else { + /* finally, we check if it is a function */ + CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */ + if(c == '(') { + /* push c back, higher level parser needs it */ + CHKiRet(ctokUngetCharFromStream(pThis, c)); + pToken->tok = ctok_FUNCTION; + // TODO: fill function name + } else { /* give up... */ + pToken->tok = ctok_INVALID; + } + } + } + break; + } + } while(bRetry); /* warning: do ... while()! */ + + *ppToken = pToken; + dbgoprint((obj_t*) pToken, "token: %d\n", pToken->tok); + +finalize_it: + if(iRet != RS_RET_OK) { + if(pToken != NULL) + ctok_token.Destruct(&pToken); + } + + RETiRet; +} + + +/* property set methods */ +/* simple ones first */ +DEFpropSetMeth(ctok, pp, uchar*) + +/* return the current position of pp - most important as currently we do only + * partial parsing, so the rest must know where to start from... + * rgerhards, 2008-02-19 + */ +static rsRetVal +ctokGetpp(ctok_t *pThis, uchar **pp) +{ + DEFiRet; + ASSERT(pp != NULL); + *pp = pThis->pp; + RETiRet; +} + + +/* queryInterface function + * rgerhards, 2008-02-21 + */ +BEGINobjQueryInterface(ctok) +CODESTARTobjQueryInterface(ctok) + if(pIf->ifVersion != ctokCURR_IF_VERSION) { /* check for current version, increment on each change */ + ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED); + } + + /* ok, we have the right interface, so let's fill it + * Please note that we may also do some backwards-compatibility + * work here (if we can support an older interface version - that, + * of course, also affects the "if" above). + */ + //xxxpIf->oID = OBJctok; + + pIf->Construct = ctokConstruct; + pIf->ConstructFinalize = ctokConstructFinalize; + pIf->Destruct = ctokDestruct; + pIf->Getpp = ctokGetpp; + pIf->GetToken = ctokGetToken; + pIf->UngetToken = ctokUngetToken; + pIf->Setpp = ctokSetpp; +finalize_it: +ENDobjQueryInterface(ctok) + + + +BEGINObjClassInit(ctok, 1, OBJ_IS_CORE_MODULE) /* class, version */ + /* request objects we use */ + CHKiRet(objUse(ctok_token, CORE_COMPONENT)); + CHKiRet(objUse(var, CORE_COMPONENT)); + + OBJSetMethodHandler(objMethod_CONSTRUCTION_FINALIZER, ctokConstructFinalize); +ENDObjClassInit(ctok) + +/* vi:set ai: + */ |