/* parser.c
* This module contains functions for message parsers. It still needs to be
* converted into an object (and much extended).
*
* Module begun 2008-10-09 by Rainer Gerhards (based on previous code from syslogd.c)
*
* Copyright 2008 Rainer Gerhards and Adiscon GmbH.
*
* This file is part of the rsyslog runtime library.
*
* The rsyslog runtime library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The rsyslog runtime library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the rsyslog runtime library. If not, see .
*
* A copy of the GPL can be found in the file "COPYING" in this distribution.
* A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution.
*/
#include "config.h"
#include
#include
#include
#include
#ifdef USE_NETZIP
#include
#endif
#include "rsyslog.h"
#include "dirty.h"
#include "msg.h"
#include "obj.h"
#include "errmsg.h"
/* some defines */
#define DEFUPRI (LOG_USER|LOG_NOTICE)
/* definitions for objects we access */
DEFobjStaticHelpers
DEFobjCurrIf(glbl)
DEFobjCurrIf(errmsg)
/* static data */
/* this is a dummy class init
*/
rsRetVal parserClassInit(void)
{
DEFiRet;
/* request objects we use */
CHKiRet(objGetObjInterface(&obj)); /* this provides the root pointer for all other queries */
CHKiRet(objUse(glbl, CORE_COMPONENT));
CHKiRet(objUse(errmsg, CORE_COMPONENT));
// TODO: free components! see action.c
finalize_it:
RETiRet;
}
/* uncompress a received message if it is compressed.
* pMsg->pszRawMsg buffer is updated.
* rgerhards, 2008-10-09
*/
static inline rsRetVal uncompressMessage(msg_t *pMsg)
{
DEFiRet;
# ifdef USE_NETZIP
uchar *deflateBuf = NULL;
uLongf iLenDefBuf;
uchar *pszMsg;
size_t lenMsg;
assert(pMsg != NULL);
pszMsg = pMsg->pszRawMsg;
lenMsg = pMsg->iLenRawMsg;
/* we first need to check if we have a compressed record. If so,
* we must decompress it.
*/
if(lenMsg > 0 && *pszMsg == 'z') { /* compressed data present? (do NOT change order if conditions!) */
/* we have compressed data, so let's deflate it. We support a maximum
* message size of iMaxLine. If it is larger, an error message is logged
* and the message is dropped. We do NOT try to decompress larger messages
* as such might be used for denial of service. It might happen to later
* builds that such functionality be added as an optional, operator-configurable
* feature.
*/
int ret;
iLenDefBuf = glbl.GetMaxLine();
CHKmalloc(deflateBuf = malloc(sizeof(uchar) * (iLenDefBuf + 1)));
ret = uncompress((uchar *) deflateBuf, &iLenDefBuf, (uchar *) pszMsg+1, lenMsg-1);
DBGPRINTF("Compressed message uncompressed with status %d, length: new %ld, old %d.\n",
ret, (long) iLenDefBuf, (int) (lenMsg-1));
/* Now check if the uncompression worked. If not, there is not much we can do. In
* that case, we log an error message but ignore the message itself. Storing the
* compressed text is dangerous, as it contains control characters. So we do
* not do this. If someone would like to have a copy, this code here could be
* modified to do a hex-dump of the buffer in question. We do not include
* this functionality right now.
* rgerhards, 2006-12-07
*/
if(ret != Z_OK) {
errmsg.LogError(0, NO_ERRCODE, "Uncompression of a message failed with return code %d "
"- enable debug logging if you need further information. "
"Message ignored.", ret);
FINALIZE; /* unconditional exit, nothing left to do... */
}
MsgSetRawMsg(pMsg, (char*)deflateBuf, iLenDefBuf);
free(deflateBuf);
}
finalize_it:
if(deflateBuf != NULL)
free(deflateBuf);
# else /* ifdef USE_NETZIP */
/* in this case, we still need to check if the message is compressed. If so, we must
* tell the user we can not accept it.
*/
if(pMsg->iLenRawMsg > 0 && *pMsg->pszRawMsg == 'z') {
errmsg.LogError(0, NO_ERRCODE, "Received a compressed message, but rsyslogd does not have compression "
"support enabled. The message will be ignored.");
ABORT_FINALIZE(RS_RET_NO_ZIP);
}
finalize_it:
# endif /* ifdef USE_NETZIP */
RETiRet;
}
/* sanitize a received message
* if a message gets to large during sanitization, it is truncated. This is
* as specified in the upcoming syslog RFC series.
* rgerhards, 2008-10-09
* We check if we have a NUL character at the very end of the
* message. This seems to be a frequent problem with a number of senders.
* So I have now decided to drop these NULs. However, if they are intentional,
* that may cause us some problems, e.g. with syslog-sign. On the other hand,
* current code always has problems with intentional NULs (as it needs to escape
* them to prevent problems with the C string libraries), so that does not
* really matter. Just to be on the save side, we'll log destruction of such
* NULs in the debug log.
* rgerhards, 2007-09-14
*/
static inline rsRetVal
sanitizeMessage(msg_t *pMsg)
{
DEFiRet;
uchar *pszMsg;
uchar *pDst; /* destination for copy job */
size_t lenMsg;
size_t iSrc;
size_t iDst;
size_t iMaxLine;
size_t maxDest;
uchar szSanBuf[32*1024]; /* buffer used for sanitizing a string */
assert(pMsg != NULL);
# ifdef USE_NETZIP
CHKiRet(uncompressMessage(pMsg));
# endif
pszMsg = pMsg->pszRawMsg;
lenMsg = pMsg->iLenRawMsg;
/* remove NUL character at end of message (see comment in function header) */
if(pszMsg[lenMsg-1] == '\0') {
DBGPRINTF("dropped NUL at very end of message\n");
lenMsg--;
}
/* then we check if we need to drop trailing LFs, which often make
* their way into syslog messages unintentionally. In order to remain
* compatible to recent IETF developments, we allow the user to
* turn on/off this handling. rgerhards, 2007-07-23
*/
if(bDropTrailingLF && pszMsg[lenMsg-1] == '\n') {
DBGPRINTF("dropped LF at very end of message (DropTrailingLF is set)\n");
lenMsg--;
}
/* it is much quicker to sweep over the message and see if it actually
* needs sanitation than to do the sanitation in any case. So we first do
* this and terminate when it is not needed - which is expectedly the case
* for the vast majority of messages. -- rgerhards, 2009-06-15
*/
int bNeedSanitize = 0;
for(iSrc = 0 ; iSrc < lenMsg ; iSrc++) {
if(pszMsg[iSrc] < 32) {
if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) {
bNeedSanitize = 1;
break;
}
}
}
if(!bNeedSanitize)
FINALIZE;
/* now copy over the message and sanitize it */
iMaxLine = glbl.GetMaxLine();
maxDest = lenMsg * 4; /* message can grow at most four-fold */
if(maxDest > iMaxLine)
maxDest = iMaxLine; /* but not more than the max size! */
if(maxDest < sizeof(szSanBuf))
pDst = szSanBuf;
else
CHKmalloc(pDst = malloc(sizeof(uchar) * (iMaxLine + 1)));
iSrc = iDst = 0;
while(iSrc < lenMsg && iDst < maxDest - 3) { /* leave some space if last char must be escaped */
if(pszMsg[iSrc] == '\0') { /* guard against \0 characters... */
} else if(iscntrl((int) pszMsg[iSrc])) {
if(pszMsg[iSrc] == '\0' || bEscapeCCOnRcv) {
/* we are configured to escape control characters. Please note
* that this most probably break non-western character sets like
* Japanese, Korean or Chinese. rgerhards, 2007-07-17
*/
pDst[iDst++] = cCCEscapeChar;
pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0300) >> 6);
pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0070) >> 3);
pDst[iDst++] = '0' + ((pszMsg[iSrc] & 0007));
}
} else {
pDst[iDst++] = pszMsg[iSrc];
}
++iSrc;
}
MsgSetRawMsg(pMsg, (char*)pDst, iDst); /* save sanitized string */
if(pDst != szSanBuf)
free(pDst);
finalize_it:
RETiRet;
}
/* Parse a received message. The object's rawmsg property is taken and
* parsed according to the relevant standards. This can later be
* extended to support configured parsers.
* rgerhards, 2008-10-09
*/
rsRetVal parseMsg(msg_t *pMsg)
{
DEFiRet;
uchar *msg;
int pri;
CHKiRet(sanitizeMessage(pMsg));
/* we needed to sanitize first, because we otherwise do not have a C-string we can print... */
DBGPRINTF("msg parser: flags %x, from '%s', msg '%s'\n", pMsg->msgFlags, pMsg->pszRcvFrom, pMsg->pszRawMsg);
/* pull PRI */
pri = DEFUPRI;
msg = pMsg->pszRawMsg;
if(*msg == '<') {
/* while we process the PRI, we also fill the PRI textual representation
* inside the msg object. This may not be ideal from an OOP point of view,
* but it offers us performance...
*/
pri = 0;
while(isdigit((int) *++msg)) {
pri = 10 * pri + (*msg - '0');
}
if(*msg == '>')
++msg;
if(pri & ~(LOG_FACMASK|LOG_PRIMASK))
pri = DEFUPRI;
}
pMsg->iFacility = LOG_FAC(pri);
pMsg->iSeverity = LOG_PRI(pri);
MsgSetAfterPRIOffs(pMsg, msg - pMsg->pszRawMsg);
if(pMsg->bParseHOSTNAME == 0)
MsgSetHOSTNAME(pMsg, pMsg->pszRcvFrom);
/* rger 2005-11-24 (happy thanksgiving!): we now need to check if we have
* a traditional syslog message or one formatted according to syslog-protocol.
* We need to apply different parsers depending on that. We use the
* -protocol VERSION field for the detection.
*/
if(msg[0] == '1' && msg[1] == ' ') {
dbgprintf("Message has syslog-protocol format.\n");
setProtocolVersion(pMsg, 1);
if(parseRFCSyslogMsg(pMsg, pMsg->msgFlags) == 1) {
msgDestruct(&pMsg);
ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases!
}
} else { /* we have legacy syslog */
dbgprintf("Message has legacy syslog format.\n");
setProtocolVersion(pMsg, 0);
if(parseLegacySyslogMsg(pMsg, pMsg->msgFlags) == 1) {
msgDestruct(&pMsg);
ABORT_FINALIZE(RS_RET_ERR); // TODO: we need to handle these cases!
}
}
/* finalize message object */
pMsg->msgFlags &= ~NEEDS_PARSING; /* this message is now parsed */
MsgPrepareEnqueue(pMsg); /* "historical" name - preparese for multi-threading */
finalize_it:
RETiRet;
}