/* cfgtok.c - helper class to tokenize an input stream - which surprisingly
 * currently does not work with streams but with string. But that will
 * probably change over time ;) This class was originally written to support
 * the expression module but may evolve when (if) the expression module is
 * expanded (or aggregated) by a full-fledged ctoken based config parser.
 * Obviously, this class is used together with config files and not any other
 * parse function.
 *
 * Module begun 2008-02-19 by Rainer Gerhards
 *
 * Copyright 2008 Rainer Gerhards and Adiscon GmbH.
 *
 * This file is part of rsyslog.
 *
 * Rsyslog is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Rsyslog is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Rsyslog.  If not, see <http://www.gnu.org/licenses/>.
 *
 * A copy of the GPL can be found in the file "COPYING" in this distribution.
 */

#include "config.h"
#include <stdlib.h>
#include <ctype.h>
#include <strings.h>
#include <assert.h>

#include "rsyslog.h"
#include "template.h"
#include "ctok.h"

/* static data */
DEFobjStaticHelpers


/* Standard-Constructor
 */
BEGINobjConstruct(ctok) /* be sure to specify the object type also in END macro! */
ENDobjConstruct(ctok)


/* ConstructionFinalizer
 * rgerhards, 2008-01-09
 */
rsRetVal ctokConstructFinalize(ctok_t __attribute__((unused)) *pThis)
{
	DEFiRet;
	RETiRet;
}


/* destructor for the ctok object */
BEGINobjDestruct(ctok) /* be sure to specify the object type also in END and CODESTART macros! */
CODESTARTobjDestruct(ctok)
	/* ... then free resources */
ENDobjDestruct(ctok)


/* unget character from input stream. At most one character can be ungotten.
 * This funtion is only permitted to be called after at least one character
 * has been read from the stream. Right now, we handle the situation simply by
 * moving the string "stream" pointer one position backwards. If we work with
 * real streams (some time), the strm object will handle the functionality
 * itself. -- rgerhards, 2008-02-19
 */
static rsRetVal
ctokUngetCharFromStream(ctok_t *pThis, uchar __attribute__((unused)) c)
{
	DEFiRet;

	ISOBJ_TYPE_assert(pThis, ctok);
	--pThis->pp;

	RETiRet;
}


/* get the next character from the input "stream" (currently just a in-memory
 * string...) -- rgerhards, 2008-02-19
 */
static rsRetVal 
ctokGetCharFromStream(ctok_t *pThis, uchar *pc)
{
	DEFiRet;

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(pc != NULL);

	/* end of string or begin of comment terminates the "stream" */
	if(*pThis->pp == '\0' || *pThis->pp == '#') {
		ABORT_FINALIZE(RS_RET_EOS);
	} else {
		*pc = *pThis->pp;
		++pThis->pp;
	}

finalize_it:
	RETiRet;
}


/* skip whitespace in the input "stream".
 * rgerhards, 2008-02-19
 */
static rsRetVal 
ctokSkipWhitespaceFromStream(ctok_t *pThis)
{
	DEFiRet;
	uchar c;

	ISOBJ_TYPE_assert(pThis, ctok);

	CHKiRet(ctokGetCharFromStream(pThis, &c));
	while(isspace(c)) {
		CHKiRet(ctokGetCharFromStream(pThis, &c));
	}

	/* we must unget the one non-whitespace we found */
	CHKiRet(ctokUngetCharFromStream(pThis, c));

dbgprintf("skipped whitepsace, stream now '%s'\n", pThis->pp);
finalize_it:
	RETiRet;
}


/* get the next word from the input "stream" (currently just a in-memory
 * string...). A word is anything from the current location until the
 * first non-alphanumeric character. If the word is longer
 * than the provided memory buffer, parsing terminates when buffer length
 * has been reached. A buffer of 128 bytes or more should always be by
 * far sufficient. -- rgerhards, 2008-02-19
 */
static rsRetVal 
ctokGetWordFromStream(ctok_t *pThis, uchar *pWordBuf, size_t lenWordBuf)
{
	DEFiRet;
	uchar c;

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(pWordBuf != NULL);
	ASSERT(lenWordBuf > 0);

	CHKiRet(ctokSkipWhitespaceFromStream(pThis));

	CHKiRet(ctokGetCharFromStream(pThis, &c));
	while(isalnum(c) && lenWordBuf > 1) {
		*pWordBuf++ = c;
		--lenWordBuf;
		CHKiRet(ctokGetCharFromStream(pThis, &c));
	}
	*pWordBuf = '\0'; /* there is always space for this - see while() */

	/* push back the char that we have read too much */
	CHKiRet(ctokUngetCharFromStream(pThis, c));

dbgprintf("end ctokGetWordFromStream, stream now '%s'\n", pThis->pp);
finalize_it:
	RETiRet;
}


/* read in a constant number
 * This is the "number" ABNF element
 * rgerhards, 2008-02-19
 */
static rsRetVal
ctokGetNumber(ctok_t *pThis, ctok_token_t *pToken)
{
	DEFiRet;
	int64 n; /* the parsed number */
	uchar c;
	int valC;
	int iBase;

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(pToken != NULL);

	pToken->tok = ctok_NUMBER;

	CHKiRet(ctokGetCharFromStream(pThis, &c));
	if(c == '0') { /* octal? */
		CHKiRet(ctokGetCharFromStream(pThis, &c));
		if(c == 'x') { /* nope, hex! */
			CHKiRet(ctokGetCharFromStream(pThis, &c));
			c = tolower(c);
			iBase = 16;
		} else {
			iBase = 8;
		}
	} else {
		iBase = 10;
	}
		
	n = 0;
	/* this loop is quite simple, a variable name is terminated by whitespace. */
	while(isdigit(c) || (c >= 'a' && c <= 'f')) {
		if(isdigit(c)) {
			valC = c - '0';
		} else {
			valC = c - 'a' + 10;
		}
		
		if(valC >= iBase) {
			if(iBase == 8) {
				ABORT_FINALIZE(RS_RET_INVALID_OCTAL_DIGIT);
			} else {
				ABORT_FINALIZE(RS_RET_INVALID_HEX_DIGIT);
			}
		}
		/* we now have the next value and know it is right */
		n = n * iBase + valC;
		CHKiRet(ctokGetCharFromStream(pThis, &c));
		c = tolower(c);
	}
	pToken->intVal = n;

dbgprintf("number, number is: '%lld'\n", pToken->intVal);

finalize_it:
	RETiRet;
}


/* read in a variable
 * This covers both msgvar and sysvar from the ABNF.
 * rgerhards, 2008-02-19
 */
static rsRetVal
ctokGetVar(ctok_t *pThis, ctok_token_t *pToken)
{
	DEFiRet;
	uchar c;

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(pToken != NULL);

	CHKiRet(ctokGetCharFromStream(pThis, &c));

	if(c == '$') { /* second dollar, we have a system variable */
		pToken->tok = ctok_SYSVAR;
		CHKiRet(ctokGetCharFromStream(pThis, &c)); /* "eat" it... */
	} else {
		pToken->tok = ctok_MSGVAR;
	}

	CHKiRet(rsCStrConstruct(&pToken->pstrVal));
	/* this loop is quite simple, a variable name is terminated by whitespace. */
	while(!isspace(c)) {
		CHKiRet(rsCStrAppendChar(pToken->pstrVal, tolower(c)));
		CHKiRet(ctokGetCharFromStream(pThis, &c));
	}
	CHKiRet(rsCStrFinish(pStrB));

dbgprintf("var, var is: '%s'\n", rsCStrGetSzStr(pToken->pstrVal));

finalize_it:
	if(iRet != RS_RET_OK) {
		if(pToken->pstrVal != NULL) {
			rsCStrDestruct(pToken->pstrVal);
			pToken->pstrVal = NULL;
		}
	}

	RETiRet;
}


/* read in a simple string (simpstr in ABNF)
 * rgerhards, 2008-02-19
 */
static rsRetVal
ctokGetSimpStr(ctok_t *pThis, ctok_token_t *pToken)
{
	DEFiRet;
	uchar c;
	int bInEsc = 0;

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(pToken != NULL);

	pToken->tok = ctok_SIMPSTR;

	CHKiRet(rsCStrConstruct(&pToken->pstrVal));
	CHKiRet(ctokGetCharFromStream(pThis, &c));
	/* while we are in escape mode (had a backslash), no sequence
	 * terminates the loop. If outside, it is terminated by a single quote.
	 */
	while(bInEsc || c != '\'') {
		if(bInEsc) {
			CHKiRet(rsCStrAppendChar(pToken->pstrVal, c));
			bInEsc = 0;
		} else {
			if(c == '\\') {
				bInEsc = 1;
			} else {
				CHKiRet(rsCStrAppendChar(pToken->pstrVal, c));
			}
		}
		CHKiRet(ctokGetCharFromStream(pThis, &c));
	}
	CHKiRet(rsCStrFinish(pStrB));

dbgprintf("simpstr, str is: '%s'\n", rsCStrGetSzStr(pToken->pstrVal));

finalize_it:
	if(iRet != RS_RET_OK) {
		if(pToken->pstrVal != NULL) {
			rsCStrDestruct(pToken->pstrVal);
			pToken->pstrVal = NULL;
		}
	}

	RETiRet;
}


/* Unget a token. The token ungotten will be returned the next time
 * ctokGetToken() is called. Only one token can be ungotten at a time.
 * If a second token is ungotten, the first is lost. This is considered
 * a programming error.
 * rgerhards, 2008-02-20
 */
rsRetVal
ctokUngetToken(ctok_t *pThis, ctok_token_t *pToken)
{
	DEFiRet;

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(pToken != NULL);
	ASSERT(pThis->pUngotToken == NULL);

	pThis->pUngotToken = pToken;

	RETiRet;
}


/* skip an inine comment (just like a C-comment) 
 * rgerhards, 2008-02-20
 */
static rsRetVal
ctokSkipInlineComment(ctok_t *pThis)
{
	DEFiRet;
	uchar c;
	int bHadAsterisk = 0;

	ISOBJ_TYPE_assert(pThis, ctok);

	CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
	while(!(bHadAsterisk && c == '/')) {
		bHadAsterisk = (c == '*') ? 1 : 0;
		CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read next */
	}

finalize_it:
	RETiRet;
}


/* Get the *next* token from the input stream. This parses the next token and
 * ignores any whitespace in between. End of stream is communicated via iRet.
 * The returned token must either be destructed by the caller OR being passed
 * back to ctokUngetToken().
 * rgerhards, 2008-02-19
 */
rsRetVal
ctokGetToken(ctok_t *pThis, ctok_token_t **ppToken)
{
	DEFiRet;
	ctok_token_t *pToken;
	uchar c;
	uchar szWord[128];
	int bRetry = 0; /* retry parse? Only needed for inline comments... */

	ISOBJ_TYPE_assert(pThis, ctok);
	ASSERT(ppToken != NULL);

	/* first check if we have an ungotten token and, if so, provide that
	 * one back (without any parsing). -- rgerhards, 2008-02-20
	 */
	if(pThis->pUngotToken != NULL) {
		*ppToken = pThis->pUngotToken;
		pThis->pUngotToken = NULL;
		FINALIZE;
	}

	/* setup the stage - create our token */
	CHKiRet(ctok_tokenConstruct(&pToken));
	CHKiRet(ctok_tokenConstructFinalize(pToken));

	/* find the next token. We may loop when we have inline comments */
	do {
		bRetry = 0;
		CHKiRet(ctokSkipWhitespaceFromStream(pThis));
		CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
		switch(c) {
			case '=': /* == */
				CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
				pToken->tok = (c == '=')? ctok_CMP_EQ : ctok_INVALID;
				break;
			case '!': /* != */
				CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
				pToken->tok = (c == '=')? ctok_CMP_NEQ : ctok_INVALID;
				break;
			case '<': /* <, <=, <> */
				CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
				if(c == '=') {
					pToken->tok = ctok_CMP_LTEQ;
				} else if(c == '>') {
					pToken->tok = ctok_CMP_NEQ;
				} else {
					pToken->tok = ctok_CMP_LT;
				}
				break;
			case '>': /* >, >= */
				CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
				if(c == '=') {
					pToken->tok = ctok_CMP_GTEQ;
				} else {
					pToken->tok = ctok_CMP_GT;
				}
				break;
			case '+':
				pToken->tok = ctok_PLUS;
				break;
			case '-':
				pToken->tok = ctok_MINUS;
				break;
			case '*':
				pToken->tok = ctok_TIMES;
				break;
			case '/': /* /, /.* ... *./ (comments, mungled here for obvious reasons...) */
				CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
				if(c == '*') {
					/* we have a comment and need to skip it */
					ctokSkipInlineComment(pThis);
					bRetry = 1;
				} else {
					CHKiRet(ctokUngetCharFromStream(pThis, c)); /* put back, not processed */
				}
				pToken->tok = ctok_DIV;
				break;
			case '%':
				pToken->tok = ctok_MOD;
				break;
			case '(':
				pToken->tok = ctok_LPAREN;
				break;
			case ')':
				pToken->tok = ctok_RPAREN;
				break;
			case ',':
				pToken->tok = ctok_COMMA;
				break;
			case '$':
				CHKiRet(ctokGetVar(pThis, pToken));
				break;
			case '\'': /* simple string, this is somewhat more elaborate */
				CHKiRet(ctokGetSimpStr(pThis, pToken));
				break;
			case '"':
				/* TODO: template string parser */
				ABORT_FINALIZE(RS_RET_NOT_IMPLEMENTED);
				break;
			default:
				CHKiRet(ctokUngetCharFromStream(pThis, c)); /* push back, we need it in any case */
				if(isdigit(c)) {
					CHKiRet(ctokGetNumber(pThis, pToken));
				} else { /* now we check if we have a multi-char sequence */
					CHKiRet(ctokGetWordFromStream(pThis, szWord, sizeof(szWord)/sizeof(uchar)));
					if(!strcasecmp((char*)szWord, "and")) {
						pToken->tok = ctok_AND;
					} else if(!strcasecmp((char*)szWord, "or")) {
						pToken->tok = ctok_OR;
					} else if(!strcasecmp((char*)szWord, "not")) {
						pToken->tok = ctok_NOT;
					} else if(!strcasecmp((char*)szWord, "then")) {
						pToken->tok = ctok_THEN;
					} else {
						/* finally, we check if it is a function */
						CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
						if(c == '(') {
							/* push c back, higher level parser needs it */
							CHKiRet(ctokUngetCharFromStream(pThis, c));
							pToken->tok = ctok_FUNCTION;
							// TODO: fill function name
						} else { /* give up... */
							pToken->tok = ctok_INVALID;
						}
					}
				}
				break;
		}
	} while(bRetry); /* warning: do ... while()! */

	*ppToken = pToken;
RUNLOG_VAR("%d", pToken->tok);

finalize_it:
	if(iRet != RS_RET_OK) {
		if(pToken != NULL)
			ctok_tokenDestruct(&pToken);
	}

	RETiRet;
}


/* property set methods */
/* simple ones first */
DEFpropSetMeth(ctok, pp, uchar*)

/* return the current position of pp - most important as currently we do only
 * partial parsing, so the rest must know where to start from...
 * rgerhards, 2008-02-19
 */
rsRetVal
ctokGetpp(ctok_t *pThis, uchar **pp)
{
	DEFiRet;
	ASSERT(pp != NULL);
	*pp = pThis->pp;
	RETiRet;
}

BEGINObjClassInit(ctok, 1) /* class, version */
	OBJSetMethodHandler(objMethod_CONSTRUCTION_FINALIZER, ctokConstructFinalize);
ENDObjClassInit(ctok)

/* vi:set ai:
 */