From ace4f2f75202aec39449dac11b9eb1deca7428d7 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 8 Oct 2008 18:55:11 +0200 Subject: reordered imudp processing. Message parsing is now done as part of main message queue worker processing (was part of the input thread) This should also improve performance, as potentially more work is done in parallel. --- runtime/parser.c | 314 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 runtime/parser.c (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c new file mode 100644 index 00000000..8c4272a0 --- /dev/null +++ b/runtime/parser.c @@ -0,0 +1,314 @@ +/* parser.c + * This module contains functions for message parsers. It still needs to be + * converted into an object (and much extended). + * + * Module begun 2008-10-09 by Rainer Gerhards (based on previous code from syslogd.c) + * + * Copyright 2008 Rainer Gerhards and Adiscon GmbH. + * + * This file is part of the rsyslog runtime library. + * + * The rsyslog runtime library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The rsyslog runtime library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the rsyslog runtime library. If not, see . + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution. + */ +#include "config.h" +#include +#include +#include +#ifdef USE_NETZIP +#include +#endif + +#include "rsyslog.h" +#include "dirty.h" +#include "msg.h" +#include "obj.h" +#include "errmsg.h" + +/* some defines */ +#define DEFUPRI (LOG_USER|LOG_NOTICE) + +#warning "msg object must be updated with new property for persisting the queue!" +/* definitions for objects we access */ +DEFobjStaticHelpers +DEFobjCurrIf(glbl) +DEFobjCurrIf(errmsg) + +/* static data */ + + +/* this is a dummy class init + */ +rsRetVal parserClassInit(void) +{ + DEFiRet; + + /* request objects we use */ + CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ + CHKiRet(objUse(glbl, CORE_COMPONENT)); + CHKiRet(objUse(errmsg, CORE_COMPONENT)); +// TODO: free components! see action.c +finalize_it: + RETiRet; +} + + +/* uncompress a received message if it is compressed. + * pMsg->pszRawMsg buffer is updated. + * rgerhards, 2008-10-09 + */ +static inline rsRetVal uncompressMessage(msg_t *pMsg) +{ + DEFiRet; +# ifdef USE_NETZIP + uchar *deflateBuf = NULL; + uLongf iLenDefBuf; + uchar *pszMsg; + size_t lenMsg; + + assert(pMsg != NULL); + pszMsg = pMsg->pszRawMsg; + lenMsg = pMsg->iLenRawMsg; + + /* we first need to check if we have a compressed record. If so, + * we must decompress it. + */ + if(lenMsg > 0 && *pszMsg == 'z') { /* compressed data present? (do NOT change order if conditions!) */ + /* we have compressed data, so let's deflate it. We support a maximum + * message size of iMaxLine. If it is larger, an error message is logged + * and the message is dropped. We do NOT try to decompress larger messages + * as such might be used for denial of service. It might happen to later + * builds that such functionality be added as an optional, operator-configurable + * feature. + */ + int ret; + iLenDefBuf = glbl.GetMaxLine(); + CHKmalloc(deflateBuf = malloc(sizeof(uchar) * (iLenDefBuf + 1))); + ret = uncompress((uchar *) deflateBuf, &iLenDefBuf, (uchar *) pszMsg+1, lenMsg-1); + DBGPRINTF("Compressed message uncompressed with status %d, length: new %ld, old %d.\n", + ret, (long) iLenDefBuf, (int) (lenMsg-1)); + /* Now check if the uncompression worked. If not, there is not much we can do. In + * that case, we log an error message but ignore the message itself. Storing the + * compressed text is dangerous, as it contains control characters. So we do + * not do this. If someone would like to have a copy, this code here could be + * modified to do a hex-dump of the buffer in question. We do not include + * this functionality right now. + * rgerhards, 2006-12-07 + */ + if(ret != Z_OK) { + errmsg.LogError(0, NO_ERRCODE, "Uncompression of a message failed with return code %d " + "- enable debug logging if you need further information. " + "Message ignored.", ret); + FINALIZE; /* unconditional exit, nothing left to do... */ + } + free(pMsg->pszRawMsg); + pMsg->pszRawMsg = deflateBuf; + pMsg->iLenRawMsg = iLenDefBuf; + deflateBuf = NULL; /* logically "freed" - caller is now responsible */ + } +finalize_it: + if(deflateBuf != NULL) + free(deflateBuf); + +# else /* ifdef USE_NETZIP */ + + /* in this case, we still need to check if the message is compressed. If so, we must + * tell the user we can not accept it. + */ + if(len > 0 && *msg == 'z') { + errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " + "support enabled. The message will be ignored."); + ABORT_FINALIZE(RS_RET_NO_ZIP); + } + +# endif /* ifdef USE_NETZIP */ + + RETiRet; +} + + +/* sanitize a received message + * if a message gets to large during sanitization, it is truncated. This is + * as specified in the upcoming syslog RFC series. + * rgerhards, 2008-10-09 + * We check if we have a NUL character at the very end of the + * message. This seems to be a frequent problem with a number of senders. + * So I have now decided to drop these NULs. However, if they are intentional, + * that may cause us some problems, e.g. with syslog-sign. On the other hand, + * current code always has problems with intentional NULs (as it needs to escape + * them to prevent problems with the C string libraries), so that does not + * really matter. Just to be on the save side, we'll log destruction of such + * NULs in the debug log. + * rgerhards, 2007-09-14 + */ +static inline rsRetVal +sanitizeMessage(msg_t *pMsg) +{ + DEFiRet; + uchar *pszMsg; + uchar *pDst; /* destination for copy job */ + size_t lenMsg; + size_t iSrc; + size_t iDst; + size_t iMaxLine; + + assert(pMsg != NULL); + +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + + pszMsg = pMsg->pszRawMsg; + lenMsg = pMsg->iLenRawMsg; + + /* remove NUL character at end of message (see comment in function header) */ + if(pszMsg[lenMsg-1] == '\0') { + DBGPRINTF("dropped NUL at very end of message\n"); + lenMsg--; + } + + /* then we check if we need to drop trailing LFs, which often make + * their way into syslog messages unintentionally. In order to remain + * compatible to recent IETF developments, we allow the user to + * turn on/off this handling. rgerhards, 2007-07-23 + */ + if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') { + DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); + lenMsg--; + } + + /* now copy over the message and sanitize it */ + /* TODO: can we get cheaper memory alloc? {alloca()?}*/ + iMaxLine = glbl.GetMaxLine(); + CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); + iSrc = iDst = 0; + while(iSrc < lenMsg && iDst < iMaxLine) { + if(pszMsg[iSrc] == '\0') { /* guard against \0 characters... */ + /* changed to the sequence (somewhat) proposed in + * draft-ietf-syslog-protocol-19. rgerhards, 2006-11-30 + */ + if(iDst + 3 < iMaxLine) { /* do we have space? */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0'; + pDst[iDst++] = '0'; + pDst[iDst++] = '0'; + } /* if we do not have space, we simply ignore the '\0'... */ + /* log an error? Very questionable... rgerhards, 2006-11-30 */ + /* decided: we do not log an error, it won't help... rger, 2007-06-21 */ + } else if(bEscapeCCOnRcv && iscntrl((int) pszMsg[iSrc])) { + /* we are configured to escape control characters. Please note + * that this most probably break non-western character sets like + * Japanese, Korean or Chinese. rgerhards, 2007-07-17 + * Note: sysklogd logs octal values only for DEL and CCs above 127. + * For others, it logs ^n where n is the control char converted to an + * alphabet character. We like consistency and thus escape it to octal + * in all cases. If someone complains, we may change the mode. At least + * we known now what's going on. + * rgerhards, 2007-07-17 + */ + if(iDst + 3 < iMaxLine) { /* do we have space? */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); + } /* again, if we do not have space, we ignore the char - see comment at '\0' */ + } else { + pDst[iDst++] = pszMsg[iSrc]; + } + ++iSrc; + } + pDst[iDst] = '\0'; /* space *is* reserved for this! */ + + /* we have a sanitized string. Let's save it now */ + free(pMsg->pszRawMsg); + if((pMsg->pszRawMsg = malloc((iDst+1) * sizeof(uchar))) == NULL) { + /* when we get no new buffer, we use what we already have ;) */ + pMsg->pszRawMsg = pDst; + } else { + /* trim buffer */ + memcpy(pMsg->pszRawMsg, pDst, iDst+1); + free(pDst); /* too big! */ + pMsg->iLenRawMsg = iDst; + } + +finalize_it: + RETiRet; +} + +/* Parse a received message. The object's rawmsg property is taken and + * parsed according to the relevant standards. This can later be + * extended to support configured parsers. + * rgerhards, 2008-10-09 + */ +rsRetVal parseMsg(msg_t *pMsg) +{ + DEFiRet; + uchar *msg; + int pri; + + CHKiRet(sanitizeMessage(pMsg)); + + /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ + DBGPRINTF("msg parser: flags %x, from '%s', msg %s\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); + + /* pull PRI */ + pri = DEFUPRI; + msg = pMsg->pszRawMsg; + if(*msg == '<') { + pri = 0; + while(isdigit((int) *++msg)) { + pri = 10 * pri + (*msg - '0'); + } + if(*msg == '>') + ++msg; + if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) + pri = DEFUPRI; + } + pMsg->iFacility = LOG_FAC(pri); + pMsg->iSeverity = LOG_PRI(pri); + MsgSetUxTradMsg(pMsg, (char*) msg); + + if(pMsg->bParseHOSTNAME == 0) + MsgSetHOSTNAME(pMsg, (char*) pMsg->pszRcvFrom); + + /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have + * a traditional syslog message or one formatted according to syslog-protocol. + * We need to apply different parsers depending on that. We use the + * -protocol VERSION field for the detection. + */ + if(msg[0] == '1' && msg[1] == ' ') { + dbgprintf("Message has syslog-protocol format.\n"); + setProtocolVersion(pMsg, 1); + if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { // TODO: parseRFC... should pull flags from pMsg + msgDestruct(&pMsg); + ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + } + } else { /* we have legacy syslog */ + dbgprintf("Message has legacy syslog format.\n"); + setProtocolVersion(pMsg, 0); + if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) { + msgDestruct(&pMsg); + ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + } + } + + /* finalize message object */ + pMsg->bIsParsed = 1; /* this message is now parsed */ + MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ + +finalize_it: + RETiRet; +} -- cgit From 6c6e9a0f3f7d454ba9553a750b195d7f99c7299a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 9 Oct 2008 13:45:56 +0200 Subject: moved bParseHostname and bIsParsed to msgFlags This enables us to use more efficient calling conventions and also helps us keep the on-disk structure of a msg object more consistent in future releases. --- runtime/parser.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 8c4272a0..fbdeebeb 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -41,7 +41,6 @@ /* some defines */ #define DEFUPRI (LOG_USER|LOG_NOTICE) -#warning "msg object must be updated with new property for persisting the queue!" /* definitions for objects we access */ DEFobjStaticHelpers DEFobjCurrIf(glbl) @@ -306,7 +305,7 @@ rsRetVal parseMsg(msg_t *pMsg) } /* finalize message object */ - pMsg->bIsParsed = 1; /* this message is now parsed */ + pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ finalize_it: -- cgit From 6b905b511b685f2ae28ef94d2e0ba14d1a3f4df3 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 3 Dec 2008 10:45:11 +0100 Subject: bugfix: code did not compile without zlib --- runtime/parser.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index fbdeebeb..15dfd4e0 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -127,12 +127,16 @@ finalize_it: /* in this case, we still need to check if the message is compressed. If so, we must * tell the user we can not accept it. */ - if(len > 0 && *msg == 'z') { + //pszMsg = pMsg->pszRawMsg; + //lenMsg = pMsg->iLenRawMsg; + //if(lenMsg > 0 && *msg == 'z') { + if(pMsg->iLenRawMsg > 0 && *pMsg->pszRawMsg == 'z') { errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " "support enabled. The message will be ignored."); ABORT_FINALIZE(RS_RET_NO_ZIP); } +finalize_it: # endif /* ifdef USE_NETZIP */ RETiRet; -- cgit From c5bfd2b24ca8c490401a0835ec741c05acf0ed3e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 3 Dec 2008 10:46:27 +0100 Subject: some cleanup (forgotten...) --- runtime/parser.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index 15dfd4e0..ec2a28c7 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -127,9 +127,6 @@ finalize_it: /* in this case, we still need to check if the message is compressed. If so, we must * tell the user we can not accept it. */ - //pszMsg = pMsg->pszRawMsg; - //lenMsg = pMsg->iLenRawMsg; - //if(lenMsg > 0 && *msg == 'z') { if(pMsg->iLenRawMsg > 0 && *pMsg->pszRawMsg == 'z') { errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " "support enabled. The message will be ignored."); @@ -295,7 +292,7 @@ rsRetVal parseMsg(msg_t *pMsg) if(msg[0] == '1' && msg[1] == ' ') { dbgprintf("Message has syslog-protocol format.\n"); setProtocolVersion(pMsg, 1); - if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { // TODO: parseRFC... should pull flags from pMsg + if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { msgDestruct(&pMsg); ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! } -- cgit From 3e1220f434533b5e91de51f5de17cc76eaa8af45 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 5 Dec 2008 01:10:06 +0100 Subject: fixed some compiler warnings --- runtime/parser.c | 1 + 1 file changed, 1 insertion(+) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index ec2a28c7..b549cd19 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -27,6 +27,7 @@ #include "config.h" #include #include +#include #include #ifdef USE_NETZIP #include -- cgit From 60b8ce14bf33e76237cf82dd1f68acc750e64316 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 8 Dec 2008 15:42:47 +0100 Subject: added $PreserveFQDN config file directive Enables to use FQDNs in sender names where the legacy default --- runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'runtime/parser.c') diff --git a/runtime/parser.c b/runtime/parser.c index b549cd19..b4ab0a3e 100644 --- a/runtime/parser.c +++ b/runtime/parser.c @@ -263,7 +263,7 @@ rsRetVal parseMsg(msg_t *pMsg) CHKiRet(sanitizeMessage(pMsg)); /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ - DBGPRINTF("msg parser: flags %x, from '%s', msg %s\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); + DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); /* pull PRI */ pri = DEFUPRI; -- cgit