diff options
Diffstat (limited to 'runtime/parser.c')
-rw-r--r-- | runtime/parser.c | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/runtime/parser.c b/runtime/parser.c new file mode 100644 index 00000000..fbdeebeb --- /dev/null +++ b/runtime/parser.c @@ -0,0 +1,313 @@ +/* parser.c + * This module contains functions for message parsers. It still needs to be + * converted into an object (and much extended). + * + * Module begun 2008-10-09 by Rainer Gerhards (based on previous code from syslogd.c) + * + * Copyright 2008 Rainer Gerhards and Adiscon GmbH. + * + * This file is part of the rsyslog runtime library. + * + * The rsyslog runtime library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The rsyslog runtime library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the rsyslog runtime library. If not, see <http://www.gnu.org/licenses/>. + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution. + */ +#include "config.h" +#include <stdlib.h> +#include <ctype.h> +#include <assert.h> +#ifdef USE_NETZIP +#include <zlib.h> +#endif + +#include "rsyslog.h" +#include "dirty.h" +#include "msg.h" +#include "obj.h" +#include "errmsg.h" + +/* some defines */ +#define DEFUPRI (LOG_USER|LOG_NOTICE) + +/* definitions for objects we access */ +DEFobjStaticHelpers +DEFobjCurrIf(glbl) +DEFobjCurrIf(errmsg) + +/* static data */ + + +/* this is a dummy class init + */ +rsRetVal parserClassInit(void) +{ + DEFiRet; + + /* request objects we use */ + CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */ + CHKiRet(objUse(glbl, CORE_COMPONENT)); + CHKiRet(objUse(errmsg, CORE_COMPONENT)); +// TODO: free components! see action.c +finalize_it: + RETiRet; +} + + +/* uncompress a received message if it is compressed. + * pMsg->pszRawMsg buffer is updated. + * rgerhards, 2008-10-09 + */ +static inline rsRetVal uncompressMessage(msg_t *pMsg) +{ + DEFiRet; +# ifdef USE_NETZIP + uchar *deflateBuf = NULL; + uLongf iLenDefBuf; + uchar *pszMsg; + size_t lenMsg; + + assert(pMsg != NULL); + pszMsg = pMsg->pszRawMsg; + lenMsg = pMsg->iLenRawMsg; + + /* we first need to check if we have a compressed record. If so, + * we must decompress it. + */ + if(lenMsg > 0 && *pszMsg == 'z') { /* compressed data present? (do NOT change order if conditions!) */ + /* we have compressed data, so let's deflate it. We support a maximum + * message size of iMaxLine. If it is larger, an error message is logged + * and the message is dropped. We do NOT try to decompress larger messages + * as such might be used for denial of service. It might happen to later + * builds that such functionality be added as an optional, operator-configurable + * feature. + */ + int ret; + iLenDefBuf = glbl.GetMaxLine(); + CHKmalloc(deflateBuf = malloc(sizeof(uchar) * (iLenDefBuf + 1))); + ret = uncompress((uchar *) deflateBuf, &iLenDefBuf, (uchar *) pszMsg+1, lenMsg-1); + DBGPRINTF("Compressed message uncompressed with status %d, length: new %ld, old %d.\n", + ret, (long) iLenDefBuf, (int) (lenMsg-1)); + /* Now check if the uncompression worked. If not, there is not much we can do. In + * that case, we log an error message but ignore the message itself. Storing the + * compressed text is dangerous, as it contains control characters. So we do + * not do this. If someone would like to have a copy, this code here could be + * modified to do a hex-dump of the buffer in question. We do not include + * this functionality right now. + * rgerhards, 2006-12-07 + */ + if(ret != Z_OK) { + errmsg.LogError(0, NO_ERRCODE, "Uncompression of a message failed with return code %d " + "- enable debug logging if you need further information. " + "Message ignored.", ret); + FINALIZE; /* unconditional exit, nothing left to do... */ + } + free(pMsg->pszRawMsg); + pMsg->pszRawMsg = deflateBuf; + pMsg->iLenRawMsg = iLenDefBuf; + deflateBuf = NULL; /* logically "freed" - caller is now responsible */ + } +finalize_it: + if(deflateBuf != NULL) + free(deflateBuf); + +# else /* ifdef USE_NETZIP */ + + /* in this case, we still need to check if the message is compressed. If so, we must + * tell the user we can not accept it. + */ + if(len > 0 && *msg == 'z') { + errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression " + "support enabled. The message will be ignored."); + ABORT_FINALIZE(RS_RET_NO_ZIP); + } + +# endif /* ifdef USE_NETZIP */ + + RETiRet; +} + + +/* sanitize a received message + * if a message gets to large during sanitization, it is truncated. This is + * as specified in the upcoming syslog RFC series. + * rgerhards, 2008-10-09 + * We check if we have a NUL character at the very end of the + * message. This seems to be a frequent problem with a number of senders. + * So I have now decided to drop these NULs. However, if they are intentional, + * that may cause us some problems, e.g. with syslog-sign. On the other hand, + * current code always has problems with intentional NULs (as it needs to escape + * them to prevent problems with the C string libraries), so that does not + * really matter. Just to be on the save side, we'll log destruction of such + * NULs in the debug log. + * rgerhards, 2007-09-14 + */ +static inline rsRetVal +sanitizeMessage(msg_t *pMsg) +{ + DEFiRet; + uchar *pszMsg; + uchar *pDst; /* destination for copy job */ + size_t lenMsg; + size_t iSrc; + size_t iDst; + size_t iMaxLine; + + assert(pMsg != NULL); + +# ifdef USE_NETZIP + CHKiRet(uncompressMessage(pMsg)); +# endif + + pszMsg = pMsg->pszRawMsg; + lenMsg = pMsg->iLenRawMsg; + + /* remove NUL character at end of message (see comment in function header) */ + if(pszMsg[lenMsg-1] == '\0') { + DBGPRINTF("dropped NUL at very end of message\n"); + lenMsg--; + } + + /* then we check if we need to drop trailing LFs, which often make + * their way into syslog messages unintentionally. In order to remain + * compatible to recent IETF developments, we allow the user to + * turn on/off this handling. rgerhards, 2007-07-23 + */ + if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') { + DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n"); + lenMsg--; + } + + /* now copy over the message and sanitize it */ + /* TODO: can we get cheaper memory alloc? {alloca()?}*/ + iMaxLine = glbl.GetMaxLine(); + CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1))); + iSrc = iDst = 0; + while(iSrc < lenMsg && iDst < iMaxLine) { + if(pszMsg[iSrc] == '\0') { /* guard against \0 characters... */ + /* changed to the sequence (somewhat) proposed in + * draft-ietf-syslog-protocol-19. rgerhards, 2006-11-30 + */ + if(iDst + 3 < iMaxLine) { /* do we have space? */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0'; + pDst[iDst++] = '0'; + pDst[iDst++] = '0'; + } /* if we do not have space, we simply ignore the '\0'... */ + /* log an error? Very questionable... rgerhards, 2006-11-30 */ + /* decided: we do not log an error, it won't help... rger, 2007-06-21 */ + } else if(bEscapeCCOnRcv && iscntrl((int) pszMsg[iSrc])) { + /* we are configured to escape control characters. Please note + * that this most probably break non-western character sets like + * Japanese, Korean or Chinese. rgerhards, 2007-07-17 + * Note: sysklogd logs octal values only for DEL and CCs above 127. + * For others, it logs ^n where n is the control char converted to an + * alphabet character. We like consistency and thus escape it to octal + * in all cases. If someone complains, we may change the mode. At least + * we known now what's going on. + * rgerhards, 2007-07-17 + */ + if(iDst + 3 < iMaxLine) { /* do we have space? */ + pDst[iDst++] = cCCEscapeChar; + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3); + pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007)); + } /* again, if we do not have space, we ignore the char - see comment at '\0' */ + } else { + pDst[iDst++] = pszMsg[iSrc]; + } + ++iSrc; + } + pDst[iDst] = '\0'; /* space *is* reserved for this! */ + + /* we have a sanitized string. Let's save it now */ + free(pMsg->pszRawMsg); + if((pMsg->pszRawMsg = malloc((iDst+1) * sizeof(uchar))) == NULL) { + /* when we get no new buffer, we use what we already have ;) */ + pMsg->pszRawMsg = pDst; + } else { + /* trim buffer */ + memcpy(pMsg->pszRawMsg, pDst, iDst+1); + free(pDst); /* too big! */ + pMsg->iLenRawMsg = iDst; + } + +finalize_it: + RETiRet; +} + +/* Parse a received message. The object's rawmsg property is taken and + * parsed according to the relevant standards. This can later be + * extended to support configured parsers. + * rgerhards, 2008-10-09 + */ +rsRetVal parseMsg(msg_t *pMsg) +{ + DEFiRet; + uchar *msg; + int pri; + + CHKiRet(sanitizeMessage(pMsg)); + + /* we needed to sanitize first, because we otherwise do not have a C-string we can print... */ + DBGPRINTF("msg parser: flags %x, from '%s', msg %s\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg); + + /* pull PRI */ + pri = DEFUPRI; + msg = pMsg->pszRawMsg; + if(*msg == '<') { + pri = 0; + while(isdigit((int) *++msg)) { + pri = 10 * pri + (*msg - '0'); + } + if(*msg == '>') + ++msg; + if(pri & ~(LOG_FACMASK|LOG_PRIMASK)) + pri = DEFUPRI; + } + pMsg->iFacility = LOG_FAC(pri); + pMsg->iSeverity = LOG_PRI(pri); + MsgSetUxTradMsg(pMsg, (char*) msg); + + if(pMsg->bParseHOSTNAME == 0) + MsgSetHOSTNAME(pMsg, (char*) pMsg->pszRcvFrom); + + /* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have + * a traditional syslog message or one formatted according to syslog-protocol. + * We need to apply different parsers depending on that. We use the + * -protocol VERSION field for the detection. + */ + if(msg[0] == '1' && msg[1] == ' ') { + dbgprintf("Message has syslog-protocol format.\n"); + setProtocolVersion(pMsg, 1); + if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) { // TODO: parseRFC... should pull flags from pMsg + msgDestruct(&pMsg); + ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + } + } else { /* we have legacy syslog */ + dbgprintf("Message has legacy syslog format.\n"); + setProtocolVersion(pMsg, 0); + if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) { + msgDestruct(&pMsg); + ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases! + } + } + + /* finalize message object */ + pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */ + MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */ + +finalize_it: + RETiRet; +} |