summaryrefslogtreecommitdiffstats
path: root/ctok.c
diff options
context:
space:
mode:
authorRainer Gerhards <rgerhards@adiscon.com>2008-02-19 16:16:09 +0000
committerRainer Gerhards <rgerhards@adiscon.com>2008-02-19 16:16:09 +0000
commitb5a09481faa2eda03b568839ed724970bc8a1adc (patch)
treec0f12e58fedc0b90bfdbafd00f5c2c2cd5e6ce01 /ctok.c
parent47aab374d40c05cbb7a4ceb2a4236cb65a399c3a (diff)
downloadrsyslog-b5a09481faa2eda03b568839ed724970bc8a1adc.tar.gz
rsyslog-b5a09481faa2eda03b568839ed724970bc8a1adc.tar.xz
rsyslog-b5a09481faa2eda03b568839ed724970bc8a1adc.zip
implemented initial tokenizer (stage work for expr parser)
Diffstat (limited to 'ctok.c')
-rw-r--r--ctok.c283
1 files changed, 282 insertions, 1 deletions
diff --git a/ctok.c b/ctok.c
index 917b3175..b6301ff6 100644
--- a/ctok.c
+++ b/ctok.c
@@ -30,6 +30,8 @@
#include "config.h"
#include <stdlib.h>
+#include <ctype.h>
+#include <strings.h>
#include <assert.h>
#include "rsyslog.h"
@@ -49,7 +51,7 @@ ENDobjConstruct(ctok)
/* ConstructionFinalizer
* rgerhards, 2008-01-09
*/
-rsRetVal ctokConstructFinalize(ctok_t *pThis)
+rsRetVal ctokConstructFinalize(ctok_t __attribute__((unused)) *pThis)
{
DEFiRet;
RETiRet;
@@ -62,6 +64,285 @@ CODESTARTobjDestruct(ctok)
/* ... then free resources */
ENDobjDestruct(ctok)
+
+/* unget character from input stream. At most one character can be ungotten.
+ * This funtion is only permitted to be called after at least one character
+ * has been read from the stream. Right now, we handle the situation simply by
+ * moving the string "stream" pointer one position backwards. If we work with
+ * real streams (some time), the strm object will handle the functionality
+ * itself. -- rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokUngetCharFromStream(ctok_t *pThis, uchar __attribute__((unused)) c)
+{
+ DEFiRet;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ --pThis->pp;
+
+ RETiRet;
+}
+
+
+/* get the next character from the input "stream" (currently just a in-memory
+ * string...) -- rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetCharFromStream(ctok_t *pThis, uchar *pc)
+{
+ DEFiRet;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pc != NULL);
+
+ if(*pThis->pp == '\0') {
+ ABORT_FINALIZE(RS_RET_EOS);
+ } else {
+ *pc = *pThis->pp;
+ ++pThis->pp;
+ }
+
+finalize_it:
+ RETiRet;
+}
+
+
+/* skip whitespace in the input "stream".
+ * rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokSkipWhitespaceFromStream(ctok_t *pThis)
+{
+ DEFiRet;
+ uchar c;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ while(isspace(c)) {
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ }
+
+ /* we must unget the one non-whitespace we found */
+ CHKiRet(ctokUngetCharFromStream(pThis, c));
+
+dbgprintf("skipped whitepsace, stream now '%s'\n", pThis->pp);
+finalize_it:
+ RETiRet;
+}
+
+
+/* get the next word from the input "stream" (currently just a in-memory
+ * string...). A word is anything between whitespace. If the word is longer
+ * than the provided memory buffer, parsing terminates when buffer length
+ * has been reached. A buffer of 128 bytes or more should always be by
+ * far sufficient. -- rgerhards, 2008-02-19
+ */
+static rsRetVal
+ctokGetWordFromStream(ctok_t *pThis, uchar *pWordBuf, size_t lenWordBuf)
+{
+ DEFiRet;
+ uchar c;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pWordBuf != NULL);
+ ASSERT(lenWordBuf > 0);
+
+ CHKiRet(ctokSkipWhitespaceFromStream(pThis));
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ while(!isspace(c) && lenWordBuf > 1) {
+ *pWordBuf = c;
+ --lenWordBuf;
+ CHKiRet(ctokGetCharFromStream(pThis, &c));
+ }
+ *pWordBuf = '\0'; /* there is always space for this - see while() */
+
+dbgprintf("end ctokGetWorkFromStream, stream now '%s'\n", pThis->pp);
+finalize_it:
+ RETiRet;
+}
+
+
+#if 0
+/* Get the next token from the input stream. This parses the next token and
+ * ignores any whitespace in between. End of stream is communicated via iRet.
+ * rgerhards, 2008-02-19
+ */
+rsRetVal
+ctokGetNextToken(ctok_t *pThis, ctok_token_t *pToken)
+{
+ DEFiRet;
+ uchar pszWord[128];
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pToken != NULL);
+
+ CHKiRet(ctokGetWordFromStream(pThis, pszWord, sizeof(pszWord)/sizeof(uchar)));
+
+ /* now recognize words... */
+ if(strcasecmp((char*)pszWord, "or")) {
+ *pToken = ctok_OR;
+ } else if(strcasecmp((char*)pszWord, "and")) {
+ *pToken = ctok_AND;
+ } else if(strcasecmp((char*)pszWord, "+")) {
+ *pToken = ctok_PLUS;
+ } else if(strcasecmp((char*)pszWord, "-")) {
+ *pToken = ctok_MINUS;
+ } else if(strcasecmp((char*)pszWord, "*")) {
+ *pToken = ctok_TIMES;
+ } else if(strcasecmp((char*)pszWord, "/")) {
+ *pToken = ctok_DIV;
+ } else if(strcasecmp((char*)pszWord, "%")) {
+ *pToken = ctok_MOD;
+ } else if(strcasecmp((char*)pszWord, "not")) {
+ *pToken = ctok_NOT;
+ } else if(strcasecmp((char*)pszWord, "(")) {
+ *pToken = ctok_LPAREN;
+ } else if(strcasecmp((char*)pszWord, ")")) {
+ *pToken = ctok_RPAREN;
+ } else if(strcasecmp((char*)pszWord, ",")) {
+ *pToken = ctok_COMMA;
+ } else if(strcasecmp((char*)pszWord, "$")) {
+ *pToken = ctok_DOLLAR;
+ } else if(strcasecmp((char*)pszWord, "'")) {
+ *pToken = ctok_QUOTE;
+ } else if(strcasecmp((char*)pszWord, "\"")) {
+ *pToken = ctok_DBL_QUOTE;
+ } else if(strcasecmp((char*)pszWord, "==")) {
+ *pToken = ctok_CMP_EQ;
+ } else if(strcasecmp((char*)pszWord, "!=")) {
+ *pToken = ctok_CMP_NEQ;
+ } else if(strcasecmp((char*)pszWord, "<>")) { /* an alias for the non-C folks... */
+ *pToken = ctok_CMP_NEQ;
+ } else if(strcasecmp((char*)pszWord, "<")) {
+ *pToken = ctok_CMP_LT;
+ } else if(strcasecmp((char*)pszWord, ">")) {
+ *pToken = ctok_CMP_GT;
+ } else if(strcasecmp((char*)pszWord, "<=")) {
+ *pToken = ctok_CMP_LTEQ;
+ } else if(strcasecmp((char*)pszWord, ">=")) {
+ *pToken = ctok_CMP_GTEQ;
+ }
+
+RUNLOG_VAR("%d", *pToken);
+
+finalize_it:
+ RETiRet;
+}
+#endif
+
+
+/* Get the next token from the input stream. This parses the next token and
+ * ignores any whitespace in between. End of stream is communicated via iRet.
+ * rgerhards, 2008-02-19
+ */
+rsRetVal
+ctokGetNextToken(ctok_t *pThis, ctok_token_t *pToken)
+{
+ DEFiRet;
+ uchar c;
+
+ ISOBJ_TYPE_assert(pThis, ctok);
+ ASSERT(pToken != NULL);
+
+ CHKiRet(ctokSkipWhitespaceFromStream(pThis));
+
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ switch(c) {
+ case 'o':/* or */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ *pToken = (c == 'r')? ctok_OR : ctok_INVALID;
+ break;
+ case 'a': /* and */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == 'n') {
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ *pToken = (c == 'd')? ctok_AND : ctok_INVALID;
+ } else {
+ *pToken = ctok_INVALID;
+ }
+ break;
+ case 'n': /* not */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == 'o') {
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ *pToken = (c == 't')? ctok_NOT : ctok_INVALID;
+ } else {
+ *pToken = ctok_INVALID;
+ }
+ break;
+ case '=': /* == */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ *pToken = (c == '=')? ctok_CMP_EQ : ctok_INVALID;
+ break;
+ case '!': /* != */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ *pToken = (c == '=')? ctok_CMP_NEQ : ctok_INVALID;
+ break;
+ case '<': /* <, <=, <> */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == '=') {
+ *pToken = ctok_CMP_LTEQ;
+ } else if(c == '>') {
+ *pToken = ctok_CMP_NEQ;
+ } else {
+ *pToken = ctok_CMP_LT;
+ }
+ break;
+ case '>': /* >, >= */
+ CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
+ if(c == '=') {
+ *pToken = ctok_CMP_GTEQ;
+ } else {
+ *pToken = ctok_CMP_GT;
+ }
+ break;
+ case '+':
+ *pToken = ctok_PLUS;
+ break;
+ case '-':
+ *pToken = ctok_MINUS;
+ break;
+ case '*':
+ *pToken = ctok_TIMES;
+ break;
+ case '/':
+ *pToken = ctok_DIV;
+ break;
+ case '%':
+ *pToken = ctok_MOD;
+ break;
+ case '(':
+ *pToken = ctok_LPAREN;
+ break;
+ case ')':
+ *pToken = ctok_RPAREN;
+ break;
+ case ',':
+ *pToken = ctok_COMMA;
+ break;
+ case '$':
+ *pToken = ctok_DOLLAR;
+ break;
+ case '\'':
+ *pToken = ctok_QUOTE;
+ break;
+ case '"':
+ *pToken = ctok_DBL_QUOTE;
+ break;
+ default:
+ *pToken = ctok_INVALID;
+ break;
+ }
+
+RUNLOG_VAR("%d", *pToken);
+
+finalize_it:
+ RETiRet;
+}
+
+
/* property set methods */
/* simple ones first */
DEFpropSetMeth(ctok, pp, uchar*)