From 1b7f5c54684db29c096e09238648a45dce78ebee Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Nov 2009 10:40:27 +0100 Subject: moved rfc3164/5424 code to new parser modules another milestone commit: the program works, the new interface is used, some more cleanup is needed and the per-ruleset config options are still missing. But we are getting closer... --- tools/pmrfc5424.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) (limited to 'tools/pmrfc5424.c') diff --git a/tools/pmrfc5424.c b/tools/pmrfc5424.c index acc21817..2fa2c981 100644 --- a/tools/pmrfc5424.c +++ b/tools/pmrfc5424.c @@ -27,6 +27,7 @@ */ #include "config.h" #include "rsyslog.h" +#include #include #include #include @@ -39,6 +40,7 @@ #include "glbl.h" #include "errmsg.h" #include "parser.h" +#include "datetime.h" #include "unicode-helper.h" MODULE_TYPE_PARSER @@ -50,6 +52,7 @@ DEF_PMOD_STATIC_DATA DEFobjCurrIf(errmsg) DEFobjCurrIf(glbl) DEFobjCurrIf(parser) +DEFobjCurrIf(datetime) /* config data */ @@ -60,8 +63,231 @@ CODESTARTisCompatibleWithFeature ENDisCompatibleWithFeature +/* Helper to parseRFCSyslogMsg. This function parses a field up to + * (and including) the SP character after it. The field contents is + * returned in a caller-provided buffer. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int iRet = 0; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + + /* this is the actual parsing loop */ + while(*pLenStr > 0 && *p2parse != ' ') { + *pResult++ = *p2parse++; + --(*pLenStr); + } + + if(*pLenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --(*pLenStr); + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + return 0; +} + + +/* Helper to parseRFCSyslogMsg. This function parses the structured + * data field of a message. It does NOT parse inside structured data, + * just gets the field as whole. Parsing the single entities is left + * to other functions. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int bCont = 1; + int iRet = 0; + int lenStr; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + lenStr = *pLenStr; + + /* this is the actual parsing loop + * Remeber: structured data starts with [ and includes any characters + * until the first ] followed by a SP. There may be spaces inside + * structured data. There may also be \] inside the structured data, which + * do NOT terminate an element. + */ + if(lenStr == 0 || *p2parse != '[') + return 1; /* this is NOT structured data! */ + + if(*p2parse == '-') { /* empty structured data? */ + *pResult++ = '-'; + ++p2parse; + --lenStr; + } else { + while(bCont) { + if(lenStr < 2) { + /* we now need to check if we have only structured data */ + if(lenStr > 0 && *p2parse == ']') { + *pResult++ = *p2parse; + p2parse++; + lenStr--; + bCont = 0; + } else { + iRet = 1; /* this is not valid! */ + bCont = 0; + } + } else if(*p2parse == '\\' && *(p2parse+1) == ']') { + /* this is escaped, need to copy both */ + *pResult++ = *p2parse++; + *pResult++ = *p2parse++; + lenStr -= 2; + } else if(*p2parse == ']' && *(p2parse+1) == ' ') { + /* found end, just need to copy the ] and eat the SP */ + *pResult++ = *p2parse; + p2parse += 2; + lenStr -= 2; + bCont = 0; + } else { + *pResult++ = *p2parse++; + --lenStr; + } + } + } + + if(lenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --lenStr; + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + *pLenStr = lenStr; + return 0; +} + +/* parse a RFC5424-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. Please note that + * the name (parse *RFC*) stems from the hope that syslog-protocol will + * some time become an RFC. Do not confuse this with informational + * RFC 3164 (which is legacy syslog). + * + * currently supported format: + * + * VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG + * + * is already stripped when this function is entered. VERSION already + * has been confirmed to be "1", but has NOT been stripped from the message. + * + * rger, 2005-11-24 + */ +//static int parseRFCSyslogMsg(msg_t *pMsg, int flags) BEGINparse + uchar *p2parse; + uchar *pBuf = NULL; + int lenMsg; + int bContParse = 1; CODESTARTparse + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; + + /* check if we are the right parser */ + if(lenMsg < 2 || p2parse[0] != '1' || p2parse[1] != ' ') { + ABORT_FINALIZE(RS_RET_COULD_NOT_PARSE); + } + DBGPRINTF("Message has RFC5424/syslog-protocol format.\n"); + setProtocolVersion(pMsg, 1); + p2parse += 2; + lenMsg -= 2; + + /* Now get us some memory we can use as a work buffer while parsing. + * We simply allocated a buffer sufficiently large to hold all of the + * message, so we can not run into any troubles. I think this is + * wiser than to use individual buffers. + */ + CHKmalloc(pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))); + + /* IMPORTANT NOTE: + * Validation is not actually done below nor are any errors handled. I have + * NOT included this for the current proof of concept. However, it is strongly + * advisable to add it when this code actually goes into production. + * rgerhards, 2005-11-24 + */ + + /* TIMESTAMP */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + if(pMsg->msgFlags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + } else { + DBGPRINTF("no TIMESTAMP detected!\n"); + bContParse = 0; + } + + /* HOSTNAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); + } + + /* APP-NAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetAPPNAME(pMsg, (char*)pBuf); + } + + /* PROCID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetPROCID(pMsg, (char*)pBuf); + } + + /* MSGID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetMSGID(pMsg, (char*)pBuf); + } + + /* STRUCTURED-DATA */ + if(bContParse) { + parseRFCStructuredData(&p2parse, pBuf, &lenMsg); + MsgSetStructuredData(pMsg, (char*)pBuf); + } + + /* MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + +finalize_it: + if(pBuf != NULL) + free(pBuf); ENDparse @@ -71,6 +297,7 @@ CODESTARTmodExit objRelease(errmsg, CORE_COMPONENT); objRelease(glbl, CORE_COMPONENT); objRelease(parser, CORE_COMPONENT); + objRelease(datetime, CORE_COMPONENT); ENDmodExit @@ -88,6 +315,7 @@ CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); CHKiRet(objUse(parser, CORE_COMPONENT)); + CHKiRet(objUse(datetime, CORE_COMPONENT)); dbgprintf("rfc5424 parser init called\n"); dbgprintf("GetParserName addr %p\n", GetParserName); -- cgit