diff options
author | Rainer Gerhards <rgerhards@adiscon.com> | 2009-11-03 10:41:47 +0100 |
---|---|---|
committer | Rainer Gerhards <rgerhards@adiscon.com> | 2009-11-03 10:41:47 +0100 |
commit | 7d78b3bdfd357dd921797ce983eb96532c56a7f6 (patch) | |
tree | 358f820bbfe724733638320ed3c467fca78a7498 | |
parent | f291d5c570052f163004cbf31f24bb0884dc6681 (diff) | |
download | rsyslog-7d78b3bdfd357dd921797ce983eb96532c56a7f6.tar.gz rsyslog-7d78b3bdfd357dd921797ce983eb96532c56a7f6.tar.xz rsyslog-7d78b3bdfd357dd921797ce983eb96532c56a7f6.zip |
restructured parser part of rsyslog
now cleaner and hopefully usuable as a basis for loadable parser
modules. I also cleaned up/consolidated some of the internal
message generation functionality in rsyslogd.
-rw-r--r-- | dirty.h | 3 | ||||
-rw-r--r-- | runtime/parser.c | 382 | ||||
-rw-r--r-- | tools/iminternal.c | 7 | ||||
-rw-r--r-- | tools/iminternal.h | 5 | ||||
-rw-r--r-- | tools/syslogd.c | 447 |
5 files changed, 394 insertions, 450 deletions
@@ -31,8 +31,6 @@ rsRetVal multiSubmitMsg(multi_submit_t *pMultiSub); rsRetVal submitMsg(msg_t *pMsg); rsRetVal logmsgInternal(int iErr, int pri, uchar *msg, int flags); rsRetVal parseAndSubmitMessage(uchar *hname, uchar *hnameIP, uchar *msg, int len, int flags, flowControl_t flowCtlTypeu, prop_t *pInputName, struct syslogTime *stTime, time_t ttGenTime); -int parseRFCSyslogMsg(msg_t *pMsg, int flags); -int parseLegacySyslogMsg(msg_t *pMsg, int flags); rsRetVal diagGetMainMsgQSize(int *piSize); /* for imdiag */ char* getFIOPName(unsigned iFIOP); rsRetVal createMainQueue(qqueue_t **ppQueue, uchar *pszQueueName); @@ -48,6 +46,7 @@ extern int MarkInterval; extern int repeatinterval[2]; extern int bReduceRepeatMsgs; extern qqueue_t *pMsgQueue; /* the main message queue */ +extern int bParseHOSTNAMEandTAG; /* global config var: should the hostname and tag be */ #define MAXREPEAT ((int)((sizeof(repeatinterval) / sizeof(repeatinterval[0])) - 1)) #define REPEATTIME(f) ((f)->f_time + repeatinterval[(f)->f_repeatcount]) #define BACKOFF(f) { if (++(f)->f_repeatcount > MAXREPEAT) \ diff --git a/runtime/parser.c b/runtime/parser.c index e29f3b0b..89e59f87 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -37,7 +37,10 @@ #include "dirty.h" #include "msg.h" #include "obj.h" +#include "datetime.h" #include "errmsg.h" +#include "unicode-helper.h" +#include "dirty.h" /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) @@ -46,6 +49,7 @@ DEFobjStaticHelpers DEFobjCurrIf(glbl) DEFobjCurrIf(errmsg) +DEFobjCurrIf(datetime) /* static data */ @@ -60,11 +64,389 @@ rsRetVal parserClassInit(void) CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKiRet(objUse(datetime, CORE_COMPONENT)); // TODO: free components! see action.c finalize_it: RETiRet; } +/***************************RFC 5425 PARSER ******************************************************/ + + +/* Helper to parseRFCSyslogMsg. This function parses a field up to + * (and including) the SP character after it. The field contents is + * returned in a caller-provided buffer. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int iRet = 0; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + + /* this is the actual parsing loop */ + while(*pLenStr > 0 && *p2parse != ' ') { + *pResult++ = *p2parse++; + --(*pLenStr); + } + + if(*pLenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --(*pLenStr); + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + return 0; +} + + +/* Helper to parseRFCSyslogMsg. This function parses the structured + * data field of a message. It does NOT parse inside structured data, + * just gets the field as whole. Parsing the single entities is left + * to other functions. The parsepointer is advanced + * to after the terminating SP. The caller must ensure that the + * provided buffer is large enough to hold the to be extracted value. + * Returns 0 if everything is fine or 1 if either the field is not + * SP-terminated or any other error occurs. -- rger, 2005-11-24 + * The function now receives the size of the string and makes sure + * that it does not process more than that. The *pLenStr counter is + * updated on exit. -- rgerhards, 2009-09-23 + */ +static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) +{ + uchar *p2parse; + int bCont = 1; + int iRet = 0; + int lenStr; + + assert(pp2parse != NULL); + assert(*pp2parse != NULL); + assert(pResult != NULL); + + p2parse = *pp2parse; + lenStr = *pLenStr; + + /* this is the actual parsing loop + * Remeber: structured data starts with [ and includes any characters + * until the first ] followed by a SP. There may be spaces inside + * structured data. There may also be \] inside the structured data, which + * do NOT terminate an element. + */ + if(lenStr == 0 || *p2parse != '[') + return 1; /* this is NOT structured data! */ + + if(*p2parse == '-') { /* empty structured data? */ + *pResult++ = '-'; + ++p2parse; + --lenStr; + } else { + while(bCont) { + if(lenStr < 2) { + /* we now need to check if we have only structured data */ + if(lenStr > 0 && *p2parse == ']') { + *pResult++ = *p2parse; + p2parse++; + lenStr--; + bCont = 0; + } else { + iRet = 1; /* this is not valid! */ + bCont = 0; + } + } else if(*p2parse == '\\' && *(p2parse+1) == ']') { + /* this is escaped, need to copy both */ + *pResult++ = *p2parse++; + *pResult++ = *p2parse++; + lenStr -= 2; + } else if(*p2parse == ']' && *(p2parse+1) == ' ') { + /* found end, just need to copy the ] and eat the SP */ + *pResult++ = *p2parse; + p2parse += 2; + lenStr -= 2; + bCont = 0; + } else { + *pResult++ = *p2parse++; + --lenStr; + } + } + } + + if(lenStr > 0 && *p2parse == ' ') { + ++p2parse; /* eat SP, but only if not at end of string */ + --lenStr; + } else { + iRet = 1; /* there MUST be an SP! */ + } + *pResult = '\0'; + + /* set the new parse pointer */ + *pp2parse = p2parse; + *pLenStr = lenStr; + return 0; +} + +/* parse a RFC5424-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. Please note that + * the name (parse *RFC*) stems from the hope that syslog-protocol will + * some time become an RFC. Do not confuse this with informational + * RFC 3164 (which is legacy syslog). + * + * currently supported format: + * + * <PRI>VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG + * + * <PRI> is already stripped when this function is entered. VERSION already + * has been confirmed to be "1", but has NOT been stripped from the message. + * + * rger, 2005-11-24 + */ +static int parseRFCSyslogMsg(msg_t *pMsg, int flags) +{ + uchar *p2parse; + uchar *pBuf; + int lenMsg; + int bContParse = 1; + + BEGINfunc + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; + + /* do a sanity check on the version and eat it (the caller checked this already) */ + assert(p2parse[0] == '1' && p2parse[1] == ' '); + p2parse += 2; + lenMsg -= 2; + + /* Now get us some memory we can use as a work buffer while parsing. + * We simply allocated a buffer sufficiently large to hold all of the + * message, so we can not run into any troubles. I think this is + * more wise then to use individual buffers. + */ + if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) + return 1; + + /* IMPORTANT NOTE: + * Validation is not actually done below nor are any errors handled. I have + * NOT included this for the current proof of concept. However, it is strongly + * advisable to add it when this code actually goes into production. + * rgerhards, 2005-11-24 + */ + + /* TIMESTAMP */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + if(flags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + } else { + DBGPRINTF("no TIMESTAMP detected!\n"); + bContParse = 0; + } + + /* HOSTNAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); + } + + /* APP-NAME */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetAPPNAME(pMsg, (char*)pBuf); + } + + /* PROCID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetPROCID(pMsg, (char*)pBuf); + } + + /* MSGID */ + if(bContParse) { + parseRFCField(&p2parse, pBuf, &lenMsg); + MsgSetMSGID(pMsg, (char*)pBuf); + } + + /* STRUCTURED-DATA */ + if(bContParse) { + parseRFCStructuredData(&p2parse, pBuf, &lenMsg); + MsgSetStructuredData(pMsg, (char*)pBuf); + } + + /* MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + + free(pBuf); + ENDfunc + return 0; /* all ok */ +} + + +/*********************** END RFC5425 PARSER ******************************************/ + +/***************************RFC 5425 PARSER ******************************************************/ + + +/* parse a legay-formatted syslog message. This function returns + * 0 if processing of the message shall continue and 1 if something + * went wrong and this messe should be ignored. This function has been + * implemented in the effort to support syslog-protocol. + * rger, 2005-11-24 + * As of 2006-01-10, I am removing the logic to continue parsing only + * when a valid TIMESTAMP is detected. Validity of other fields already + * is ignored. This is due to the fact that the parser has grown smarter + * and is now more able to understand different dialects of the syslog + * message format. I do not expect any bad side effects of this change, + * but I thought I log it in this comment. + * rgerhards, 2006-01-10 + */ +static int parseLegacySyslogMsg(msg_t *pMsg, int flags) +{ + uchar *p2parse; + int lenMsg; + int bTAGCharDetected; + int i; /* general index for parsing */ + uchar bufParseTAG[CONF_TAG_MAXSIZE]; + uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; + BEGINfunc + + assert(pMsg != NULL); + assert(pMsg->pszRawMsg != NULL); + lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); + p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ + + /* Check to see if msg contains a timestamp. We start by assuming + * that the message timestamp is the time of reception (which we + * generated ourselfs and then try to actually find one inside the + * message. There we go from high-to low precison and are done + * when we find a matching one. -- rgerhards, 2008-09-16 + */ + if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; + } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; + } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ + ++p2parse; /* move over space */ + --lenMsg; + if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { + /* indeed, we got it! */ + /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; + } else {/* parse pointer needs to be restored, as we moved it off-by-one + * for this try. + */ + --p2parse; + ++lenMsg; + } + } + + if(flags & IGNDATE) { + /* we need to ignore the msg data, so simply copy over reception date */ + memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); + } + + /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we + * do this only when the user has not forbidden this. I now introduce some + * code that allows a user to configure rsyslogd to treat the rest of the + * message as MSG part completely. In this case, the hostname will be the + * machine that we received the message from and the tag will be empty. This + * is meant to be an interim solution, but for now it is in the code. + */ + if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { + /* parse HOSTNAME - but only if this is network-received! + * rger, 2005-11-14: we still have a problem with BSD messages. These messages + * do NOT include a host name. In most cases, this leads to the TAG to be treated + * as hostname and the first word of the message as the TAG. Clearly, this is not + * of advantage ;) I think I have now found a way to handle this situation: there + * are certain characters which are frequently used in TAG (e.g. ':'), which are + * *invalid* in host names. So while parsing the hostname, I check for these characters. + * If I find them, I set a simple flag but continue. After parsing, I check the flag. + * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change + * the fields. I think this logic shall work with any type of syslog message. + * rgerhards, 2009-06-23: and I now have extended this logic to every character + * that is not a valid hostname. + */ + bTAGCharDetected = 0; + if(lenMsg > 0 && flags & PARSE_HOSTNAME) { + i = 0; + while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' + || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { + bufParseHOSTNAME[i] = p2parse[i]; + ++i; + } + + if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { + /* we got a hostname! */ + p2parse += i + 1; /* "eat" it (including SP delimiter) */ + lenMsg -= i + 1; + bufParseHOSTNAME[i] = '\0'; + MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); + } + } + + /* now parse TAG - that should be present in message from all sources. + * This code is somewhat not compliant with RFC 3164. As of 3164, + * the TAG field is ended by any non-alphanumeric character. In + * practice, however, the TAG often contains dashes and other things, + * which would end the TAG. So it is not desirable. As such, we only + * accept colon and SP to be terminators. Even there is a slight difference: + * a colon is PART of the TAG, while a SP is NOT part of the tag + * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character + * size limit (from RFC3164) on the tag. This had bad effects on existing + * envrionments, as sysklogd didn't obey it either (probably another bug + * in RFC3164...). We now receive the full size, but will modify the + * outputs so that only 32 characters max are used by default. + */ + i = 0; + while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { + bufParseTAG[i++] = *p2parse++; + --lenMsg; + } + if(lenMsg > 0 && *p2parse == ':') { + ++p2parse; + --lenMsg; + bufParseTAG[i++] = ':'; + } + + /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG + * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 + */ + bufParseTAG[i] = '\0'; /* terminate string */ + MsgSetTAG(pMsg, bufParseTAG, i); + } else {/* we enter this code area when the user has instructed rsyslog NOT + * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 + */ + if(!(flags & INTERNAL_MSG)) { + DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); + } + } + + /* The rest is the actual MSG */ + MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); + + ENDfunc + return 0; /* all ok */ +} + + +/***************************END RFC 5425 PARSER ******************************************************/ + /* uncompress a received message if it is compressed. * pMsg->pszRawMsg buffer is updated. diff --git a/tools/iminternal.c b/tools/iminternal.c index 0ceff3d8..bd1fa128 100644 --- a/tools/iminternal.c +++ b/tools/iminternal.c @@ -89,7 +89,7 @@ finalize_it: * The interface of this function is modelled after syslogd/logmsg(), * for which it is an "replacement". */ -rsRetVal iminternalAddMsg(int pri, msg_t *pMsg, int flags) +rsRetVal iminternalAddMsg(int pri, msg_t *pMsg) { DEFiRet; iminternal_t *pThis; @@ -100,7 +100,6 @@ rsRetVal iminternalAddMsg(int pri, msg_t *pMsg, int flags) pThis->pri = pri; pThis->pMsg = pMsg; - pThis->flags = flags; CHKiRet(llAppend(&llMsgs, NULL, (void*) pThis)); @@ -119,7 +118,7 @@ finalize_it: * from the list and return it to the caller. The caller is * responsible for freeing the message! */ -rsRetVal iminternalRemoveMsg(int *pPri, msg_t **ppMsg, int *pFlags) +rsRetVal iminternalRemoveMsg(int *pPri, msg_t **ppMsg) { DEFiRet; iminternal_t *pThis; @@ -127,11 +126,9 @@ rsRetVal iminternalRemoveMsg(int *pPri, msg_t **ppMsg, int *pFlags) assert(pPri != NULL); assert(ppMsg != NULL); - assert(pFlags != NULL); CHKiRet(llGetNextElt(&llMsgs, &llCookie, (void*)&pThis)); *pPri = pThis->pri; - *pFlags = pThis->flags; *ppMsg = pThis->pMsg; pThis->pMsg = NULL; /* we do no longer own it - important for destructor */ diff --git a/tools/iminternal.h b/tools/iminternal.h index 8dc0f171..f1062a15 100644 --- a/tools/iminternal.h +++ b/tools/iminternal.h @@ -35,15 +35,14 @@ struct iminternal_s { /* config file sysline parse entry */ int pri; msg_t *pMsg; /* the message (in all its glory) */ - int flags; }; typedef struct iminternal_s iminternal_t; /* prototypes */ rsRetVal modInitIminternal(void); rsRetVal modExitIminternal(void); -rsRetVal iminternalAddMsg(int pri, msg_t *pMsg, int flags); +rsRetVal iminternalAddMsg(int pri, msg_t *pMsg); rsRetVal iminternalHaveMsgReady(int* pbHaveOne); -rsRetVal iminternalRemoveMsg(int *pPri, msg_t **ppMsg, int *pFlags); +rsRetVal iminternalRemoveMsg(int *pPri, msg_t **ppMsg); #endif /* #ifndef IMINTERNAL_H_INCLUDED */ diff --git a/tools/syslogd.c b/tools/syslogd.c index 41d819a6..998d6512 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -157,7 +157,6 @@ DEFobjCurrIf(net) /* TODO: make go away! */ /* forward definitions */ static rsRetVal GlobalClassExit(void); -static void logmsg(msg_t *pMsg, int flags); #ifndef _PATH_LOGCONF @@ -207,7 +206,7 @@ static pid_t myPid; /* our pid for use in self-generated messages, e.g. on start /* mypid is read-only after the initial fork() */ static int bHadHUP = 0; /* did we have a HUP? */ -static int bParseHOSTNAMEandTAG = 1; /* global config var: should the hostname and tag be +int bParseHOSTNAMEandTAG = 1; /* global config var: should the hostname and tag be * parsed inside message - rgerhards, 2006-03-13 */ static int bFinished = 0; /* used by termination signal handler, read-only except there * is either 0 or the number of the signal that requested the @@ -566,13 +565,7 @@ submitErrMsg(int iErr, uchar *msg) /* rgerhards 2004-11-09: the following is a function that can be used - * to log a message orginating from the syslogd itself. In sysklogd code, - * this is done by simply calling logmsg(). However, logmsg() is changed in - * rsyslog so that it takes a msg "object". So it can no longer be called - * directly. This method here solves the need. It provides an interface that - * allows to construct a locally-generated message. Please note that this - * function here probably is only an interim solution and that we need to - * think on the best way to do this. + * to log a message orginating from the syslogd itself. */ rsRetVal logmsgInternal(int iErr, int pri, uchar *msg, int flags) @@ -601,6 +594,7 @@ logmsgInternal(int iErr, int pri, uchar *msg, int flags) pMsg->iFacility = LOG_FAC(pri); pMsg->iSeverity = LOG_PRI(pri); flags |= INTERNAL_MSG; + pMsg->msgFlags = flags; /* we now check if we should print internal messages out to stderr. This was * suggested by HKS as a way to help people troubleshoot rsyslog configuration @@ -616,12 +610,12 @@ logmsgInternal(int iErr, int pri, uchar *msg, int flags) } if(bHaveMainQueue == 0) { /* not yet in queued mode */ - iminternalAddMsg(pri, pMsg, flags); + iminternalAddMsg(pri, pMsg); } else { /* we have the queue, so we can simply provide the * message to the queue engine. */ - logmsg(pMsg, flags); + submitMsg(pMsg); } finalize_it: RETiRet; @@ -658,373 +652,6 @@ msgConsumer(void __attribute__((unused)) *notNeeded, batch_t *pBatch, int *pbShu } -/* Helper to parseRFCSyslogMsg. This function parses a field up to - * (and including) the SP character after it. The field contents is - * returned in a caller-provided buffer. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int iRet = 0; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - - /* this is the actual parsing loop */ - while(*pLenStr > 0 && *p2parse != ' ') { - *pResult++ = *p2parse++; - --(*pLenStr); - } - - if(*pLenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --(*pLenStr); - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - return 0; -} - - -/* Helper to parseRFCSyslogMsg. This function parses the structured - * data field of a message. It does NOT parse inside structured data, - * just gets the field as whole. Parsing the single entities is left - * to other functions. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int bCont = 1; - int iRet = 0; - int lenStr; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - lenStr = *pLenStr; - - /* this is the actual parsing loop - * Remeber: structured data starts with [ and includes any characters - * until the first ] followed by a SP. There may be spaces inside - * structured data. There may also be \] inside the structured data, which - * do NOT terminate an element. - */ - if(lenStr == 0 || *p2parse != '[') - return 1; /* this is NOT structured data! */ - - if(*p2parse == '-') { /* empty structured data? */ - *pResult++ = '-'; - ++p2parse; - --lenStr; - } else { - while(bCont) { - if(lenStr < 2) { - /* we now need to check if we have only structured data */ - if(lenStr > 0 && *p2parse == ']') { - *pResult++ = *p2parse; - p2parse++; - lenStr--; - bCont = 0; - } else { - iRet = 1; /* this is not valid! */ - bCont = 0; - } - } else if(*p2parse == '\\' && *(p2parse+1) == ']') { - /* this is escaped, need to copy both */ - *pResult++ = *p2parse++; - *pResult++ = *p2parse++; - lenStr -= 2; - } else if(*p2parse == ']' && *(p2parse+1) == ' ') { - /* found end, just need to copy the ] and eat the SP */ - *pResult++ = *p2parse; - p2parse += 2; - lenStr -= 2; - bCont = 0; - } else { - *pResult++ = *p2parse++; - --lenStr; - } - } - } - - if(lenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --lenStr; - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - *pLenStr = lenStr; - return 0; -} - -/* parse a RFC5424-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. Please note that - * the name (parse *RFC*) stems from the hope that syslog-protocol will - * some time become an RFC. Do not confuse this with informational - * RFC 3164 (which is legacy syslog). - * - * currently supported format: - * - * <PRI>VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG - * - * <PRI> is already stripped when this function is entered. VERSION already - * has been confirmed to be "1", but has NOT been stripped from the message. - * - * rger, 2005-11-24 - */ -int parseRFCSyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - uchar *pBuf; - int lenMsg; - int bContParse = 1; - - BEGINfunc - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; - - /* do a sanity check on the version and eat it (the caller checked this already) */ - assert(p2parse[0] == '1' && p2parse[1] == ' '); - p2parse += 2; - lenMsg -= 2; - - /* Now get us some memory we can use as a work buffer while parsing. - * We simply allocated a buffer sufficiently large to hold all of the - * message, so we can not run into any troubles. I think this is - * more wise then to use individual buffers. - */ - if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) - return 1; - - /* IMPORTANT NOTE: - * Validation is not actually done below nor are any errors handled. I have - * NOT included this for the current proof of concept. However, it is strongly - * advisable to add it when this code actually goes into production. - * rgerhards, 2005-11-24 - */ - - /* TIMESTAMP */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - } else { - DBGPRINTF("no TIMESTAMP detected!\n"); - bContParse = 0; - } - - /* HOSTNAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); - } - - /* APP-NAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetAPPNAME(pMsg, (char*)pBuf); - } - - /* PROCID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetPROCID(pMsg, (char*)pBuf); - } - - /* MSGID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetMSGID(pMsg, (char*)pBuf); - } - - /* STRUCTURED-DATA */ - if(bContParse) { - parseRFCStructuredData(&p2parse, pBuf, &lenMsg); - MsgSetStructuredData(pMsg, (char*)pBuf); - } - - /* MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - free(pBuf); - ENDfunc - return 0; /* all ok */ -} - - -/* parse a legay-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. - * rger, 2005-11-24 - * As of 2006-01-10, I am removing the logic to continue parsing only - * when a valid TIMESTAMP is detected. Validity of other fields already - * is ignored. This is due to the fact that the parser has grown smarter - * and is now more able to understand different dialects of the syslog - * message format. I do not expect any bad side effects of this change, - * but I thought I log it in this comment. - * rgerhards, 2006-01-10 - */ -int parseLegacySyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - int lenMsg; - int bTAGCharDetected; - int i; /* general index for parsing */ - uchar bufParseTAG[CONF_TAG_MAXSIZE]; - uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; - BEGINfunc - - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - - /* Check to see if msg contains a timestamp. We start by assuming - * that the message timestamp is the time of reception (which we - * generated ourselfs and then try to actually find one inside the - * message. There we go from high-to low precison and are done - * when we find a matching one. -- rgerhards, 2008-09-16 - */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; - } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ - ++p2parse; /* move over space */ - --lenMsg; - if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* indeed, we got it! */ - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else {/* parse pointer needs to be restored, as we moved it off-by-one - * for this try. - */ - --p2parse; - ++lenMsg; - } - } - - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - - /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we - * do this only when the user has not forbidden this. I now introduce some - * code that allows a user to configure rsyslogd to treat the rest of the - * message as MSG part completely. In this case, the hostname will be the - * machine that we received the message from and the tag will be empty. This - * is meant to be an interim solution, but for now it is in the code. - */ - if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { - /* parse HOSTNAME - but only if this is network-received! - * rger, 2005-11-14: we still have a problem with BSD messages. These messages - * do NOT include a host name. In most cases, this leads to the TAG to be treated - * as hostname and the first word of the message as the TAG. Clearly, this is not - * of advantage ;) I think I have now found a way to handle this situation: there - * are certain characters which are frequently used in TAG (e.g. ':'), which are - * *invalid* in host names. So while parsing the hostname, I check for these characters. - * If I find them, I set a simple flag but continue. After parsing, I check the flag. - * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change - * the fields. I think this logic shall work with any type of syslog message. - * rgerhards, 2009-06-23: and I now have extended this logic to every character - * that is not a valid hostname. - */ - bTAGCharDetected = 0; - if(lenMsg > 0 && flags & PARSE_HOSTNAME) { - i = 0; - while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' - || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { - bufParseHOSTNAME[i] = p2parse[i]; - ++i; - } - - if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { - /* we got a hostname! */ - p2parse += i + 1; /* "eat" it (including SP delimiter) */ - lenMsg -= i + 1; - bufParseHOSTNAME[i] = '\0'; - MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); - } - } - - /* now parse TAG - that should be present in message from all sources. - * This code is somewhat not compliant with RFC 3164. As of 3164, - * the TAG field is ended by any non-alphanumeric character. In - * practice, however, the TAG often contains dashes and other things, - * which would end the TAG. So it is not desirable. As such, we only - * accept colon and SP to be terminators. Even there is a slight difference: - * a colon is PART of the TAG, while a SP is NOT part of the tag - * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character - * size limit (from RFC3164) on the tag. This had bad effects on existing - * envrionments, as sysklogd didn't obey it either (probably another bug - * in RFC3164...). We now receive the full size, but will modify the - * outputs so that only 32 characters max are used by default. - */ - i = 0; - while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { - bufParseTAG[i++] = *p2parse++; - --lenMsg; - } - if(lenMsg > 0 && *p2parse == ':') { - ++p2parse; - --lenMsg; - bufParseTAG[i++] = ':'; - } - - /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG - * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 - */ - bufParseTAG[i] = '\0'; /* terminate string */ - MsgSetTAG(pMsg, bufParseTAG, i); - } else {/* we enter this code area when the user has instructed rsyslog NOT - * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 - */ - if(!(flags & INTERNAL_MSG)) { - DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); - } - } - - /* The rest is the actual MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - ENDfunc - return 0; /* all ok */ -} - - /* submit a message to the main message queue. This is primarily * a hook to prevent the need for callers to know about the main message queue * rgerhards, 2008-02-13 @@ -1078,65 +705,6 @@ finalize_it: } -/* Log a message to the appropriate log files, users, etc. based on - * the priority. - * rgerhards 2004-11-08: actually, this also decodes all but the PRI part. - * rgerhards 2004-11-09: ... but only, if syslogd could properly be initialized - * if not, we use emergency logging to the console and in - * this case, no further decoding happens. - * changed to no longer receive a plain message but a msg object instead. - * rgerhards-2004-11-16: OK, we are now up to another change... This method - * actually needs to PARSE the message. How exactly this needs to happen depends on - * a number of things. Most importantly, it depends on the source. For example, - * locally received messages (SOURCE_UNIXAF) do NOT have a hostname in them. So - * we need to treat them differntly form network-received messages which have. - * Well, actually not all network-received message really have a hostname. We - * can just hope they do, but we can not be sure. So this method tries to find - * whatever can be found in the message and uses that... Obviously, there is some - * potential for misinterpretation, which we simply can not solve under the - * circumstances given. - */ -static void -logmsg(msg_t *pMsg, int flags) -{ - char *msg; - - BEGINfunc - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - - msg = (char*) pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - DBGPRINTF("logmsg: flags %x, from '%s', msg %s\n", flags, getRcvFrom(pMsg), msg); - - /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have - * a traditional syslog message or one formatted according to syslog-protocol. - * We need to apply different parsers depending on that. We use the - * -protocol VERSION field for the detection. - */ - if(msg[0] == '1' && msg[1] == ' ') { - DBGPRINTF("Message has syslog-protocol format.\n"); - setProtocolVersion(pMsg, 1); - if(parseRFCSyslogMsg(pMsg, flags) == 1) { - msgDestruct(&pMsg); - return; - } - } else { /* we have legacy syslog */ - DBGPRINTF("Message has legacy syslog format.\n"); - setProtocolVersion(pMsg, 0); - if(parseLegacySyslogMsg(pMsg, flags) == 1) { - msgDestruct(&pMsg); - return; - } - } - - /* ---------------------- END PARSING ---------------- */ - - /* now submit the message to the main queue - then we are done */ - pMsg->msgFlags = flags; - MsgPrepareEnqueue(pMsg); - qqueueEnqObj(pMsgQueue, pMsg->flowCtlType, (void*) pMsg); - ENDfunc -} static void @@ -2213,11 +1781,10 @@ void sigttin_handler() static void processImInternal(void) { int iPri; - int iFlags; msg_t *pMsg; - while(iminternalRemoveMsg(&iPri, &pMsg, &iFlags) == RS_RET_OK) { - logmsg(pMsg, iFlags); + while(iminternalRemoveMsg(&iPri, &pMsg) == RS_RET_OK) { + submitMsg(pMsg); } } |