diff options
author | Rainer Gerhards <rgerhards@adiscon.com> | 2009-11-04 10:40:27 +0100 |
---|---|---|
committer | Rainer Gerhards <rgerhards@adiscon.com> | 2009-11-04 10:40:27 +0100 |
commit | 1b7f5c54684db29c096e09238648a45dce78ebee (patch) | |
tree | 7db5f2d5ea45b53109b0f9f585fca845fa0773fe /runtime/parser.c | |
parent | b41e0d51f54a89f489bbf1c1bf5f85d576ae4049 (diff) | |
download | rsyslog-1b7f5c54684db29c096e09238648a45dce78ebee.tar.gz rsyslog-1b7f5c54684db29c096e09238648a45dce78ebee.tar.xz rsyslog-1b7f5c54684db29c096e09238648a45dce78ebee.zip |
moved rfc3164/5424 code to new parser modules
another milestone commit: the program works, the new interface
is used, some more cleanup is needed and the per-ruleset config
options are still missing. But we are getting closer...
Diffstat (limited to 'runtime/parser.c')
-rw-r--r-- | runtime/parser.c | 476 |
1 files changed, 69 insertions, 407 deletions
diff --git a/runtime/parser.c b/runtime/parser.c index f59d1974..2c764739 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -40,6 +40,7 @@ #include "datetime.h" #include "errmsg.h" #include "parser.h" +#include "ruleset.h" #include "unicode-helper.h" #include "dirty.h" #include "cfsysline.h" @@ -52,21 +53,25 @@ DEFobjStaticHelpers DEFobjCurrIf(glbl) DEFobjCurrIf(errmsg) DEFobjCurrIf(datetime) +DEFobjCurrIf(ruleset) /* static data */ -static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */ /* config data */ static uchar cCCEscapeChar = '#';/* character to be used to start an escape sequence for control chars */ static int bEscapeCCOnRcv = 1; /* escape control characters on reception: 0 - no, 1 - yes */ static int bDropTrailingLF = 1; /* drop trailing LF's on reception? */ -/* we create a small helper list of parsers so that we can obtain their - * handles if the config needs one. This is also used to unload all modules on - * shutdown. +/* This is the list of all parsers known to us. + * This is also used to unload all modules on shutdown. */ parserList_t *pParsLstRoot = NULL; +/* this is the list of the default parsers, to be used if no others + * are specified. + */ +parserList_t *pDfltParsLst = NULL; + /* intialize (but NOT allocate) a parser list. Primarily meant as a hook * which can be used to extend the list in the future. So far, just sets @@ -139,6 +144,27 @@ finalize_it: /* --- END helper functions for parser list handling --- */ +/* Add a an already existing parser to the default list. As usual, order + * of calls is important (most importantly, that means the legacy parser, + * which can process everything, MUST be added last!). + * rgerhards, 2009-11-04 + */ +static rsRetVal +AddDfltParser(uchar *pName) +{ + parser_t *pParser; + DEFiRet; + + CHKiRet(FindParser(&pParser, pName)); + CHKiRet(AddParserToList(&pDfltParsLst, pParser)); + dbgprintf("Parser '%s' added to default parser set.\n", pName); + +finalize_it: + RETiRet; +} + + + BEGINobjConstruct(parser) /* be sure to specify the object type also in END macro! */ ENDobjConstruct(parser) @@ -152,7 +178,7 @@ rsRetVal parserConstructFinalize(parser_t *pThis) ISOBJ_TYPE_assert(pThis, parser); CHKiRet(AddParserToList(&pParsLstRoot, pThis)); - DBGPRINTF("parser '%s' added to list of available parser\n", pThis->pName); + DBGPRINTF("Parser '%s' added to list of available parsers.\n", pThis->pName); finalize_it: RETiRet; @@ -164,383 +190,6 @@ CODESTARTobjDestruct(parser) //TODO: free module! ENDobjDestruct(parser) -/***************************RFC 5425 PARSER ******************************************************/ - - -/* Helper to parseRFCSyslogMsg. This function parses a field up to - * (and including) the SP character after it. The field contents is - * returned in a caller-provided buffer. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int iRet = 0; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - - /* this is the actual parsing loop */ - while(*pLenStr > 0 && *p2parse != ' ') { - *pResult++ = *p2parse++; - --(*pLenStr); - } - - if(*pLenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --(*pLenStr); - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - return 0; -} - - -/* Helper to parseRFCSyslogMsg. This function parses the structured - * data field of a message. It does NOT parse inside structured data, - * just gets the field as whole. Parsing the single entities is left - * to other functions. The parsepointer is advanced - * to after the terminating SP. The caller must ensure that the - * provided buffer is large enough to hold the to be extracted value. - * Returns 0 if everything is fine or 1 if either the field is not - * SP-terminated or any other error occurs. -- rger, 2005-11-24 - * The function now receives the size of the string and makes sure - * that it does not process more than that. The *pLenStr counter is - * updated on exit. -- rgerhards, 2009-09-23 - */ -static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) -{ - uchar *p2parse; - int bCont = 1; - int iRet = 0; - int lenStr; - - assert(pp2parse != NULL); - assert(*pp2parse != NULL); - assert(pResult != NULL); - - p2parse = *pp2parse; - lenStr = *pLenStr; - - /* this is the actual parsing loop - * Remeber: structured data starts with [ and includes any characters - * until the first ] followed by a SP. There may be spaces inside - * structured data. There may also be \] inside the structured data, which - * do NOT terminate an element. - */ - if(lenStr == 0 || *p2parse != '[') - return 1; /* this is NOT structured data! */ - - if(*p2parse == '-') { /* empty structured data? */ - *pResult++ = '-'; - ++p2parse; - --lenStr; - } else { - while(bCont) { - if(lenStr < 2) { - /* we now need to check if we have only structured data */ - if(lenStr > 0 && *p2parse == ']') { - *pResult++ = *p2parse; - p2parse++; - lenStr--; - bCont = 0; - } else { - iRet = 1; /* this is not valid! */ - bCont = 0; - } - } else if(*p2parse == '\\' && *(p2parse+1) == ']') { - /* this is escaped, need to copy both */ - *pResult++ = *p2parse++; - *pResult++ = *p2parse++; - lenStr -= 2; - } else if(*p2parse == ']' && *(p2parse+1) == ' ') { - /* found end, just need to copy the ] and eat the SP */ - *pResult++ = *p2parse; - p2parse += 2; - lenStr -= 2; - bCont = 0; - } else { - *pResult++ = *p2parse++; - --lenStr; - } - } - } - - if(lenStr > 0 && *p2parse == ' ') { - ++p2parse; /* eat SP, but only if not at end of string */ - --lenStr; - } else { - iRet = 1; /* there MUST be an SP! */ - } - *pResult = '\0'; - - /* set the new parse pointer */ - *pp2parse = p2parse; - *pLenStr = lenStr; - return 0; -} - -/* parse a RFC5424-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. Please note that - * the name (parse *RFC*) stems from the hope that syslog-protocol will - * some time become an RFC. Do not confuse this with informational - * RFC 3164 (which is legacy syslog). - * - * currently supported format: - * - * <PRI>VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG - * - * <PRI> is already stripped when this function is entered. VERSION already - * has been confirmed to be "1", but has NOT been stripped from the message. - * - * rger, 2005-11-24 - */ -static int parseRFCSyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - uchar *pBuf; - int lenMsg; - int bContParse = 1; - - BEGINfunc - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; - - /* do a sanity check on the version and eat it (the caller checked this already) */ - assert(p2parse[0] == '1' && p2parse[1] == ' '); - p2parse += 2; - lenMsg -= 2; - - /* Now get us some memory we can use as a work buffer while parsing. - * We simply allocated a buffer sufficiently large to hold all of the - * message, so we can not run into any troubles. I think this is - * more wise then to use individual buffers. - */ - if((pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))) == NULL) - return 1; - - /* IMPORTANT NOTE: - * Validation is not actually done below nor are any errors handled. I have - * NOT included this for the current proof of concept. However, it is strongly - * advisable to add it when this code actually goes into production. - * rgerhards, 2005-11-24 - */ - - /* TIMESTAMP */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - } else { - DBGPRINTF("no TIMESTAMP detected!\n"); - bContParse = 0; - } - - /* HOSTNAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); - } - - /* APP-NAME */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetAPPNAME(pMsg, (char*)pBuf); - } - - /* PROCID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetPROCID(pMsg, (char*)pBuf); - } - - /* MSGID */ - if(bContParse) { - parseRFCField(&p2parse, pBuf, &lenMsg); - MsgSetMSGID(pMsg, (char*)pBuf); - } - - /* STRUCTURED-DATA */ - if(bContParse) { - parseRFCStructuredData(&p2parse, pBuf, &lenMsg); - MsgSetStructuredData(pMsg, (char*)pBuf); - } - - /* MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - free(pBuf); - ENDfunc - return 0; /* all ok */ -} - - -/*********************** END RFC5425 PARSER ******************************************/ - -/***************************RFC 5425 PARSER ******************************************************/ - - -/* parse a legay-formatted syslog message. This function returns - * 0 if processing of the message shall continue and 1 if something - * went wrong and this messe should be ignored. This function has been - * implemented in the effort to support syslog-protocol. - * rger, 2005-11-24 - * As of 2006-01-10, I am removing the logic to continue parsing only - * when a valid TIMESTAMP is detected. Validity of other fields already - * is ignored. This is due to the fact that the parser has grown smarter - * and is now more able to understand different dialects of the syslog - * message format. I do not expect any bad side effects of this change, - * but I thought I log it in this comment. - * rgerhards, 2006-01-10 - */ -static int parseLegacySyslogMsg(msg_t *pMsg, int flags) -{ - uchar *p2parse; - int lenMsg; - int bTAGCharDetected; - int i; /* general index for parsing */ - uchar bufParseTAG[CONF_TAG_MAXSIZE]; - uchar bufParseHOSTNAME[CONF_TAG_HOSTNAME]; - BEGINfunc - - assert(pMsg != NULL); - assert(pMsg->pszRawMsg != NULL); - lenMsg = pMsg->iLenRawMsg - (pMsg->offAfterPRI + 1); - p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ - - /* Check to see if msg contains a timestamp. We start by assuming - * that the message timestamp is the time of reception (which we - * generated ourselfs and then try to actually find one inside the - * message. There we go from high-to low precison and are done - * when we find a matching one. -- rgerhards, 2008-09-16 - */ - if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */; - } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */ - ++p2parse; /* move over space */ - --lenMsg; - if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { - /* indeed, we got it! */ - /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */; - } else {/* parse pointer needs to be restored, as we moved it off-by-one - * for this try. - */ - --p2parse; - ++lenMsg; - } - } - - if(flags & IGNDATE) { - /* we need to ignore the msg data, so simply copy over reception date */ - memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); - } - - /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we - * do this only when the user has not forbidden this. I now introduce some - * code that allows a user to configure rsyslogd to treat the rest of the - * message as MSG part completely. In this case, the hostname will be the - * machine that we received the message from and the tag will be empty. This - * is meant to be an interim solution, but for now it is in the code. - */ - if(bParseHOSTNAMEandTAG && !(flags & INTERNAL_MSG)) { - /* parse HOSTNAME - but only if this is network-received! - * rger, 2005-11-14: we still have a problem with BSD messages. These messages - * do NOT include a host name. In most cases, this leads to the TAG to be treated - * as hostname and the first word of the message as the TAG. Clearly, this is not - * of advantage ;) I think I have now found a way to handle this situation: there - * are certain characters which are frequently used in TAG (e.g. ':'), which are - * *invalid* in host names. So while parsing the hostname, I check for these characters. - * If I find them, I set a simple flag but continue. After parsing, I check the flag. - * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change - * the fields. I think this logic shall work with any type of syslog message. - * rgerhards, 2009-06-23: and I now have extended this logic to every character - * that is not a valid hostname. - */ - bTAGCharDetected = 0; - if(lenMsg > 0 && flags & PARSE_HOSTNAME) { - i = 0; - while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.' - || p2parse[i] == '_' || p2parse[i] == '-') && i < CONF_TAG_MAXSIZE) { - bufParseHOSTNAME[i] = p2parse[i]; - ++i; - } - - if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) { - /* we got a hostname! */ - p2parse += i + 1; /* "eat" it (including SP delimiter) */ - lenMsg -= i + 1; - bufParseHOSTNAME[i] = '\0'; - MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i); - } - } - - /* now parse TAG - that should be present in message from all sources. - * This code is somewhat not compliant with RFC 3164. As of 3164, - * the TAG field is ended by any non-alphanumeric character. In - * practice, however, the TAG often contains dashes and other things, - * which would end the TAG. So it is not desirable. As such, we only - * accept colon and SP to be terminators. Even there is a slight difference: - * a colon is PART of the TAG, while a SP is NOT part of the tag - * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character - * size limit (from RFC3164) on the tag. This had bad effects on existing - * envrionments, as sysklogd didn't obey it either (probably another bug - * in RFC3164...). We now receive the full size, but will modify the - * outputs so that only 32 characters max are used by default. - */ - i = 0; - while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) { - bufParseTAG[i++] = *p2parse++; - --lenMsg; - } - if(lenMsg > 0 && *p2parse == ':') { - ++p2parse; - --lenMsg; - bufParseTAG[i++] = ':'; - } - - /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG - * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23 - */ - bufParseTAG[i] = '\0'; /* terminate string */ - MsgSetTAG(pMsg, bufParseTAG, i); - } else {/* we enter this code area when the user has instructed rsyslog NOT - * to parse HOSTNAME and TAG - rgerhards, 2006-03-13 - */ - if(!(flags & INTERNAL_MSG)) { - DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n"); - } - } - - /* The rest is the actual MSG */ - MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); - - ENDfunc - return 0; /* all ok */ -} - - -/***************************END RFC 5425 PARSER ******************************************************/ - /* uncompress a received message if it is compressed. * pMsg->pszRawMsg buffer is updated. @@ -645,10 +294,6 @@ SanitizeMsg(msg_t *pMsg) assert(pMsg != NULL); assert(pMsg->iLenRawMsg > 0); -# ifdef USE_NETZIP - CHKiRet(uncompressMessage(pMsg)); -# endif - pszMsg = pMsg->pszRawMsg; lenMsg = pMsg->iLenRawMsg; @@ -740,15 +385,22 @@ finalize_it: static rsRetVal ParseMsg(msg_t *pMsg) { - DEFiRet; uchar *msg; int pri; int lenMsg; int iPriText; + rsRetVal localRet; + parserList_t *pParserList; + static int iErrMsgRateLimiter = 0; + DEFiRet; if(pMsg->iLenRawMsg == 0) ABORT_FINALIZE(RS_RET_EMPTY_MSG); +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + CHKiRet(SanitizeMsg(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ @@ -777,25 +429,35 @@ ParseMsg(msg_t *pMsg) pMsg->iSeverity = LOG_PRI(pri); MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg); - /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have - * a traditional syslog message or one formatted according to syslog-protocol. - * We need to apply different parsers depending on that. We use the - * -protocol VERSION field for the detection. + /* we now need to go through our list of parsers and see which one is capable of + * parsing the message. */ - if(msg[0] == '1' && msg[1] == ' ') { - dbgprintf("Message has syslog-protocol format.\n"); - setProtocolVersion(pMsg, 1); - if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { - msgDestruct(&pMsg); - ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! - } - } else { /* we have legacy syslog */ - dbgprintf("Message has legacy syslog format.\n"); - setProtocolVersion(pMsg, 0); - if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) { - msgDestruct(&pMsg); - ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + pParserList = ruleset.GetParserList(pMsg); + if(pParserList == NULL) + pParserList = pDfltParsLst; + DBGPRINTF("Using parser list %p%s.\n", pParserList, + (pParserList == pDfltParsLst) ? " (the default list)" : ""); + + while(pParserList != NULL) { + localRet = pParserList->pParser->pModule->mod.pm.parse(pMsg); + if(localRet != RS_RET_COULD_NOT_PARSE) + break; + pParserList = pParserList->pNext; + } + + /* We need to log a warning message and drop the message if we did not find a parser. + * Note that we log at most the first 1000 message, as this may very well be a problem + * that causes a message generation loop. We do not synchronize that counter, it doesn't + * matter if we log a handful messages more than we should... + */ + if(localRet != RS_RET_OK) { + if(++iErrMsgRateLimiter > 1000) { + errmsg.LogError(0, localRet, "Error: one message could not be processed by " + "any parser, message is being discarded (start of raw msg: '%50s')", + pMsg->pszRawMsg); } + DBGPRINTF("No parser could process the message (state %d), we need to discard it.\n", localRet); + ABORT_FINALIZE(localRet); } /* finalize message object */ @@ -877,6 +539,7 @@ CODESTARTobjQueryInterface(parser) pIf->SanitizeMsg = SanitizeMsg; pIf->InitParserList = InitParserList; pIf->AddParserToList = AddParserToList; + pIf->AddDfltParser = AddDfltParser; pIf->FindParser = FindParser; finalize_it: ENDobjQueryInterface(parser) @@ -902,13 +565,11 @@ resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unus * rgerhards, 2009-11-02 */ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ -//BEGINAbstractObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ /* request objects we use */ CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); CHKiRet(objUse(datetime, CORE_COMPONENT)); - - bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG(); /* cache value, is set only during rsyslogd option processing */ + CHKiRet(objUse(ruleset, CORE_COMPONENT)); CHKiRet(regCfSysLineHdlr((uchar *)"controlcharacterescapeprefix", 0, eCmdHdlrGetChar, NULL, &cCCEscapeChar, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"droptrailinglfonreception", 0, eCmdHdlrBinary, NULL, &bDropTrailingLF, NULL)); @@ -916,5 +577,6 @@ BEGINObjClassInit(parser, 1, OBJ_IS_CORE_MODULE) /* class, version */ CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL)); InitParserList(&pParsLstRoot); + InitParserList(&pDfltParsLst); ENDObjClassInit(parser) |