summaryrefslogtreecommitdiffstats
path: root/runtime/ctok.c
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/ctok.c')
-rw-r--r--runtime/ctok.c593
1 files changed, 593 insertions, 0 deletions
diff --git a/runtime/ctok.c b/runtime/ctok.c
new file mode 100644
index 00000000..de2bd8a8
--- /dev/null
+++ b/runtime/ctok.c
@@ -0,0 +1,593 @@
+/* cfgtok.c - helper class to tokenize an input stream - which surprisingly
+ * currently does not work with streams but with string. But that will
+ * probably change over time ;) This class was originally written to support
+ * the expression module but may evolve when (if) the expression module is
+ * expanded (or aggregated) by a full-fledged ctoken based config parser.
+ * Obviously, this class is used together with config files and not any other
+ * parse function.
+ *
+ * Module begun 2008-02-19 by Rainer Gerhards
+ *
+ * Copyright (C) 2008 by Rainer Gerhards and Adiscon GmbH.
+ *
+ * This file is part of the rsyslog runtime library.
+ *
+ * The rsyslog runtime library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The rsyslog runtime library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the rsyslog runtime library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * A copy of the GPL can be found in the file "COPYING" in this distribution.
+ * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution.
+ */
+
+#include "config.h"
+#include <stdlib.h>
+#include <ctype.h>
+#include <strings.h>
+#include <assert.h>
+
+#include "rsyslog.h"
+#include "template.h"
+#include "ctok.h"
+
+/* static data */
+DEFobjStaticHelpers
+DEFobjCurrIf(ctok_token)
+DEFobjCurrIf(var)
+
+
+/* Standard-Constructor
+ */
+BEGINobjConstruct(ctok) /* be sure to specify the object type also in END macro! */
+ENDobjConstruct(ctok)
+
+
+/* ConstructionFinalizer
+ * rgerhards, 2008-01-09
+ */
+rsRetVal ctokConstructFinalize(ctok_t __attribute__((unused)) *pThis)
+{
+ DEFiRet;
+ RETiRet;
+}
+
+
+/* destructor for the ctok object */
+BEGINobjDestruct(ctok) /* be sure to specify the object type also in END and CODESTART macros! */
+CODESTARTobjDestruct(ctok)
+ /* ... then free resources */
+ENDobjDestruct(ctok)
+
+
+/* unget character from input stream. At most one character can be ungotten.
+ * This funtion is only permitted to be called after at least one character
+ * has been read from the stream. Right now, we handle the situation simply by
+ * moving the string "stream" pointer one position backwards. If we work with
+ * real streams (some time), the strm object will handle the functionality
+ * itself. -- rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokUngetCharFromStream(ctok_t *pThis, uchar __attribute__((unused)) c)
+{
+ DEFiRet;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ --pThis->pp;
+
+ RETiRet;
+}
+
+
+/* get the next character from the input "stream" (currently just a in-memory
+ * string...) -- rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetCharFromStream(ctok_t *pThis, uchar *pc)
+{
+ DEFiRet;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pc != NULL);
+
+ /* end of string or begin of comment terminates the "stream" */
+ if(*pThis->pp == '\0' || *pThis->pp == '#') {
+ ABORT_FINALIZE(RS_RET_EOS);
+ } else {
+ *pc = *pThis->pp;
+ ++pThis->pp;
+ }
+
+finalize_it:
+ RETiRet;
+}
+
+
+/* skip whitespace in the input "stream".
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokSkipWhitespaceFromStream(ctok_t *pThis)
+{
+ DEFiRet;
+ uchar c;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ while(isspace(c)) {
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ }
+
+ /* we must unget the one non-whitespace we found */
+ CHKiRet(ctokUngetCharFromStream(pThis, c));
+
+dbgprintf("skipped whitepsace, stream now '%s'\n", pThis->pp);
+finalize_it:
+ RETiRet;
+}
+
+
+/* get the next word from the input "stream" (currently just a in-memory
+ * string...). A word is anything from the current location until the
+ * first non-alphanumeric character. If the word is longer
+ * than the provided memory buffer, parsing terminates when buffer length
+ * has been reached. A buffer of 128 bytes or more should always be by
+ * far sufficient. -- rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetWordFromStream(ctok_t *pThis, uchar *pWordBuf, size_t lenWordBuf)
+{
+ DEFiRet;
+ uchar c;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pWordBuf != NULL);
+ ASSERT(lenWordBuf > 0);
+
+ CHKiRet(ctokSkipWhitespaceFromStream(pThis));
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ while((isalnum(c) || c == '_' || c == '-') && lenWordBuf > 1) {
+ *pWordBuf++ = c;
+ --lenWordBuf;
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ }
+ *pWordBuf = '\0'; /* there is always space for this - see while() */
+
+ /* push back the char that we have read too much */
+ CHKiRet(ctokUngetCharFromStream(pThis, c));
+
+finalize_it:
+ RETiRet;
+}
+
+
+/* read in a constant number
+ * This is the "number" ABNF element
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetNumber(ctok_t *pThis, ctok_token_t *pToken)
+{
+ DEFiRet;
+ number_t n; /* the parsed number */
+ uchar c;
+ int valC;
+ int iBase;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pToken != NULL);
+
+ pToken->tok = ctok_NUMBER;
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ if(c == '0') { /* octal? */
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ if(c == 'x') { /* nope, hex! */
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ c = tolower(c);
+ iBase = 16;
+ } else {
+ iBase = 8;
+ }
+ } else {
+ iBase = 10;
+ }
+
+ n = 0;
+ /* this loop is quite simple, a variable name is terminated by whitespace. */
+ while(isdigit(c) || (c >= 'a' && c <= 'f')) {
+ if(isdigit(c)) {
+ valC = c - '0';
+ } else {
+ valC = c - 'a' + 10;
+ }
+
+ if(valC >= iBase) {
+ if(iBase == 8) {
+ ABORT_FINALIZE(RS_RET_INVALID_OCTAL_DIGIT);
+ } else {
+ ABORT_FINALIZE(RS_RET_INVALID_HEX_DIGIT);
+ }
+ }
+ /* we now have the next value and know it is right */
+ n = n * iBase + valC;
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ c = tolower(c);
+ }
+
+ /* we need to unget the character that made the loop terminate */
+ CHKiRet(ctokUngetCharFromStream(pThis, c));
+
+ CHKiRet(var.SetNumber(pToken->pVar, n));
+
+finalize_it:
+ RETiRet;
+}
+
+
+/* read in a variable
+ * This covers both msgvar and sysvar from the ABNF.
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetVar(ctok_t *pThis, ctok_token_t *pToken)
+{
+ DEFiRet;
+ uchar c;
+ cstr_t *pstrVal;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pToken != NULL);
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+
+ if(c == '$') { /* second dollar, we have a system variable */
+ pToken->tok = ctok_SYSVAR;
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* "eat" it... */
+ } else {
+ pToken->tok = ctok_MSGVAR;
+ }
+
+ CHKiRet(rsCStrConstruct(&pstrVal));
+ /* this loop is quite simple, a variable name is terminated by whitespace. */
+ while(!isspace(c)) {
+ CHKiRet(rsCStrAppendChar(pstrVal, tolower(c)));
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ }
+ CHKiRet(rsCStrFinish(pStrB));
+
+ CHKiRet(var.SetString(pToken->pVar, pstrVal));
+ pstrVal = NULL;
+
+finalize_it:
+ if(iRet != RS_RET_OK) {
+ if(pstrVal != NULL) {
+ rsCStrDestruct(&pstrVal);
+ }
+ }
+
+ RETiRet;
+}
+
+
+/* read in a simple string (simpstr in ABNF)
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetSimpStr(ctok_t *pThis, ctok_token_t *pToken)
+{
+ DEFiRet;
+ uchar c;
+ int bInEsc = 0;
+ cstr_t *pstrVal;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pToken != NULL);
+
+ pToken->tok = ctok_SIMPSTR;
+
+ CHKiRet(rsCStrConstruct(&pstrVal));
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ /* while we are in escape mode (had a backslash), no sequence
+ * terminates the loop. If outside, it is terminated by a single quote.
+ */
+ while(bInEsc || c != '\'') {
+ if(bInEsc) {
+ CHKiRet(rsCStrAppendChar(pstrVal, c));
+ bInEsc = 0;
+ } else {
+ if(c == '\\') {
+ bInEsc = 1;
+ } else {
+ CHKiRet(rsCStrAppendChar(pstrVal, c));
+ }
+ }
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ }
+ CHKiRet(rsCStrFinish(pStrB));
+
+ CHKiRet(var.SetString(pToken->pVar, pstrVal));
+ pstrVal = NULL;
+
+finalize_it:
+ if(iRet != RS_RET_OK) {
+ if(pstrVal != NULL) {
+ rsCStrDestruct(&pstrVal);
+ }
+ }
+
+ RETiRet;
+}
+
+
+/* Unget a token. The token ungotten will be returned the next time
+ * ctokGetToken() is called. Only one token can be ungotten at a time.
+ * If a second token is ungotten, the first is lost. This is considered
+ * a programming error.
+ * rgerhards, 2008-02-20
+ */
+static rsRetVal
+ctokUngetToken(ctok_t *pThis, ctok_token_t *pToken)
+{
+ DEFiRet;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pToken != NULL);
+ ASSERT(pThis->pUngotToken == NULL);
+
+ pThis->pUngotToken = pToken;
+
+ RETiRet;
+}
+
+
+/* skip an inine comment (just like a C-comment)
+ * rgerhards, 2008-02-20
+ */
+static rsRetVal
+ctokSkipInlineComment(ctok_t *pThis)
+{
+ DEFiRet;
+ uchar c;
+ int bHadAsterisk = 0;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ while(!(bHadAsterisk && c == '/')) {
+ bHadAsterisk = (c == '*') ? 1 : 0;
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read next */
+ }
+
+finalize_it:
+ RETiRet;
+}
+
+
+
+/* Get the *next* token from the input stream. This parses the next token and
+ * ignores any whitespace in between. End of stream is communicated via iRet.
+ * The returned token must either be destructed by the caller OR being passed
+ * back to ctokUngetToken().
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetToken(ctok_t *pThis, ctok_token_t **ppToken)
+{
+ DEFiRet;
+ ctok_token_t *pToken;
+ uchar c;
+ uchar szWord[128];
+ int bRetry = 0; /* retry parse? Only needed for inline comments... */
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(ppToken != NULL);
+
+ /* first check if we have an ungotten token and, if so, provide that
+ * one back (without any parsing). -- rgerhards, 2008-02-20
+ */
+ if(pThis->pUngotToken != NULL) {
+ *ppToken = pThis->pUngotToken;
+ pThis->pUngotToken = NULL;
+ FINALIZE;
+ }
+
+ /* setup the stage - create our token */
+ CHKiRet(ctok_token.Construct(&pToken));
+ CHKiRet(ctok_token.ConstructFinalize(pToken));
+
+ /* find the next token. We may loop when we have inline comments */
+ do {
+ bRetry = 0;
+ CHKiRet(ctokSkipWhitespaceFromStream(pThis));
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ switch(c) {
+ case '=': /* == */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ pToken->tok = (c == '=')? ctok_CMP_EQ : ctok_INVALID;
+ break;
+ case '!': /* != */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ pToken->tok = (c == '=')? ctok_CMP_NEQ : ctok_INVALID;
+ break;
+ case '<': /* <, <=, <> */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == '=') {
+ pToken->tok = ctok_CMP_LTEQ;
+ } else if(c == '>') {
+ pToken->tok = ctok_CMP_NEQ;
+ } else {
+ pToken->tok = ctok_CMP_LT;
+ }
+ break;
+ case '>': /* >, >= */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == '=') {
+ pToken->tok = ctok_CMP_GTEQ;
+ } else {
+ pToken->tok = ctok_CMP_GT;
+ }
+ break;
+ case '+':
+ pToken->tok = ctok_PLUS;
+ break;
+ case '-':
+ pToken->tok = ctok_MINUS;
+ break;
+ case '*':
+ pToken->tok = ctok_TIMES;
+ break;
+ case '/': /* /, /.* ... *./ (comments, mungled here for obvious reasons...) */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == '*') {
+ /* we have a comment and need to skip it */
+ ctokSkipInlineComment(pThis);
+ bRetry = 1;
+ } else {
+ CHKiRet(ctokUngetCharFromStream(pThis, c)); /* put back, not processed */
+ }
+ pToken->tok = ctok_DIV;
+ break;
+ case '%':
+ pToken->tok = ctok_MOD;
+ break;
+ case '(':
+ pToken->tok = ctok_LPAREN;
+ break;
+ case ')':
+ pToken->tok = ctok_RPAREN;
+ break;
+ case ',':
+ pToken->tok = ctok_COMMA;
+ break;
+ case '&':
+ pToken->tok = ctok_STRADD;
+ break;
+ case '$':
+ CHKiRet(ctokGetVar(pThis, pToken));
+ break;
+ case '\'': /* simple string, this is somewhat more elaborate */
+ CHKiRet(ctokGetSimpStr(pThis, pToken));
+ break;
+ case '"':
+ /* TODO: template string parser */
+ ABORT_FINALIZE(RS_RET_NOT_IMPLEMENTED);
+ break;
+ default:
+ CHKiRet(ctokUngetCharFromStream(pThis, c)); /* push back, we need it in any case */
+ if(isdigit(c)) {
+ CHKiRet(ctokGetNumber(pThis, pToken));
+ } else { /* now we check if we have a multi-char sequence */
+ CHKiRet(ctokGetWordFromStream(pThis, szWord, sizeof(szWord)/sizeof(uchar)));
+ if(!strcasecmp((char*)szWord, "and")) {
+ pToken->tok = ctok_AND;
+ } else if(!strcasecmp((char*)szWord, "or")) {
+ pToken->tok = ctok_OR;
+ } else if(!strcasecmp((char*)szWord, "not")) {
+ pToken->tok = ctok_NOT;
+ } else if(!strcasecmp((char*)szWord, "contains")) {
+ pToken->tok = ctok_CMP_CONTAINS;
+ } else if(!strcasecmp((char*)szWord, "contains_i")) {
+ pToken->tok = ctok_CMP_CONTAINSI;
+ } else if(!strcasecmp((char*)szWord, "startswith")) {
+ pToken->tok = ctok_CMP_STARTSWITH;
+ } else if(!strcasecmp((char*)szWord, "startswith_i")) {
+ pToken->tok = ctok_CMP_STARTSWITHI;
+ } else if(!strcasecmp((char*)szWord, "then")) {
+ pToken->tok = ctok_THEN;
+ } else {
+ /* finally, we check if it is a function */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == '(') {
+ /* push c back, higher level parser needs it */
+ CHKiRet(ctokUngetCharFromStream(pThis, c));
+ pToken->tok = ctok_FUNCTION;
+ // TODO: fill function name
+ } else { /* give up... */
+ pToken->tok = ctok_INVALID;
+ }
+ }
+ }
+ break;
+ }
+ } while(bRetry); /* warning: do ... while()! */
+
+ *ppToken = pToken;
+ dbgoprint((obj_t*) pToken, "token: %d\n", pToken->tok);
+
+finalize_it:
+ if(iRet != RS_RET_OK) {
+ if(pToken != NULL)
+ ctok_token.Destruct(&pToken);
+ }
+
+ RETiRet;
+}
+
+
+/* property set methods */
+/* simple ones first */
+DEFpropSetMeth(ctok, pp, uchar*)
+
+/* return the current position of pp - most important as currently we do only
+ * partial parsing, so the rest must know where to start from...
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetpp(ctok_t *pThis, uchar **pp)
+{
+ DEFiRet;
+ ASSERT(pp != NULL);
+ *pp = pThis->pp;
+ RETiRet;
+}
+
+
+/* queryInterface function
+ * rgerhards, 2008-02-21
+ */
+BEGINobjQueryInterface(ctok)
+CODESTARTobjQueryInterface(ctok)
+ if(pIf->ifVersion != ctokCURR_IF_VERSION) { /* check for current version, increment on each change */
+ ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED);
+ }
+
+ /* ok, we have the right interface, so let's fill it
+ * Please note that we may also do some backwards-compatibility
+ * work here (if we can support an older interface version - that,
+ * of course, also affects the "if" above).
+ */
+ //xxxpIf->oID = OBJctok;
+
+ pIf->Construct = ctokConstruct;
+ pIf->ConstructFinalize = ctokConstructFinalize;
+ pIf->Destruct = ctokDestruct;
+ pIf->Getpp = ctokGetpp;
+ pIf->GetToken = ctokGetToken;
+ pIf->UngetToken = ctokUngetToken;
+ pIf->Setpp = ctokSetpp;
+finalize_it:
+ENDobjQueryInterface(ctok)
+
+
+
+BEGINObjClassInit(ctok, 1, OBJ_IS_CORE_MODULE) /* class, version */
+ /* request objects we use */
+ CHKiRet(objUse(ctok_token, CORE_COMPONENT));
+ CHKiRet(objUse(var, CORE_COMPONENT));
+
+ OBJSetMethodHandler(objMethod_CONSTRUCTION_FINALIZER, ctokConstructFinalize);
+ENDObjClassInit(ctok)
+
+/* vi:set ai:
+ */