/* pmrfc5424.c * This is a parser module for RFC5424-formatted messages. * * NOTE: read comments in module-template.h to understand how this file * works! * * File begun on 2009-11-03 by RGerhards * * Copyright 2007, 2009 Rainer Gerhards and Adiscon GmbH. * * This file is part of rsyslog. * * Rsyslog is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Rsyslog is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Rsyslog. If not, see . * * A copy of the GPL can be found in the file "COPYING" in this distribution. */ #include "config.h" #include "rsyslog.h" #include #include #include #include #include "syslogd.h" #include "conf.h" #include "syslogd-types.h" #include "template.h" #include "msg.h" #include "module-template.h" #include "glbl.h" #include "errmsg.h" #include "parser.h" #include "datetime.h" #include "unicode-helper.h" MODULE_TYPE_PARSER MODULE_TYPE_NOKEEP PARSER_NAME("rsyslog.rfc5424") /* internal structures */ DEF_PMOD_STATIC_DATA DEFobjCurrIf(errmsg) DEFobjCurrIf(glbl) DEFobjCurrIf(parser) DEFobjCurrIf(datetime) /* config data */ BEGINisCompatibleWithFeature CODESTARTisCompatibleWithFeature if(eFeat == sFEATUREAutomaticSanitazion) iRet = RS_RET_OK; if(eFeat == sFEATUREAutomaticPRIParsing) iRet = RS_RET_OK; ENDisCompatibleWithFeature /* Helper to parseRFCSyslogMsg. This function parses a field up to * (and including) the SP character after it. The field contents is * returned in a caller-provided buffer. The parsepointer is advanced * to after the terminating SP. The caller must ensure that the * provided buffer is large enough to hold the to be extracted value. * Returns 0 if everything is fine or 1 if either the field is not * SP-terminated or any other error occurs. -- rger, 2005-11-24 * The function now receives the size of the string and makes sure * that it does not process more than that. The *pLenStr counter is * updated on exit. -- rgerhards, 2009-09-23 */ static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr) { uchar *p2parse; int iRet = 0; assert(pp2parse != NULL); assert(*pp2parse != NULL); assert(pResult != NULL); p2parse = *pp2parse; /* this is the actual parsing loop */ while(*pLenStr > 0 && *p2parse != ' ') { *pResult++ = *p2parse++; --(*pLenStr); } if(*pLenStr > 0 && *p2parse == ' ') { ++p2parse; /* eat SP, but only if not at end of string */ --(*pLenStr); } else { iRet = 1; /* there MUST be an SP! */ } *pResult = '\0'; /* set the new parse pointer */ *pp2parse = p2parse; return iRet; } /* Helper to parseRFCSyslogMsg. This function parses the structured * data field of a message. It does NOT parse inside structured data, * just gets the field as whole. Parsing the single entities is left * to other functions. The parsepointer is advanced * to after the terminating SP. The caller must ensure that the * provided buffer is large enough to hold the to be extracted value. * Returns 0 if everything is fine or 1 if either the field is not * SP-terminated or any other error occurs. -- rger, 2005-11-24 * The function now receives the size of the string and makes sure * that it does not process more than that. The *pLenStr counter is * updated on exit. -- rgerhards, 2009-09-23 */ static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr) { uchar *p2parse; int bCont = 1; int iRet = 0; int lenStr; assert(pp2parse != NULL); assert(*pp2parse != NULL); assert(pResult != NULL); p2parse = *pp2parse; lenStr = *pLenStr; /* this is the actual parsing loop * Remeber: structured data starts with [ and includes any characters * until the first ] followed by a SP. There may be spaces inside * structured data. There may also be \] inside the structured data, which * do NOT terminate an element. */ if(lenStr == 0 || (*p2parse != '[' && *p2parse != '-')) return 1; /* this is NOT structured data! */ if(*p2parse == '-') { /* empty structured data? */ *pResult++ = '-'; ++p2parse; --lenStr; } else { while(bCont) { if(lenStr < 2) { /* we now need to check if we have only structured data */ if(lenStr > 0 && *p2parse == ']') { *pResult++ = *p2parse; p2parse++; lenStr--; bCont = 0; } else { iRet = 1; /* this is not valid! */ bCont = 0; } } else if(*p2parse == '\\' && *(p2parse+1) == ']') { /* this is escaped, need to copy both */ *pResult++ = *p2parse++; *pResult++ = *p2parse++; lenStr -= 2; } else if(*p2parse == ']' && *(p2parse+1) == ' ') { /* found end, just need to copy the ] and eat the SP */ *pResult++ = *p2parse; p2parse += 2; lenStr -= 2; bCont = 0; } else { *pResult++ = *p2parse++; --lenStr; } } } if(lenStr > 0 && *p2parse == ' ') { ++p2parse; /* eat SP, but only if not at end of string */ --lenStr; } else { iRet = 1; /* there MUST be an SP! */ } *pResult = '\0'; /* set the new parse pointer */ *pp2parse = p2parse; *pLenStr = lenStr; return iRet; } /* parse a RFC5424-formatted syslog message. This function returns * 0 if processing of the message shall continue and 1 if something * went wrong and this messe should be ignored. This function has been * implemented in the effort to support syslog-protocol. Please note that * the name (parse *RFC*) stems from the hope that syslog-protocol will * some time become an RFC. Do not confuse this with informational * RFC 3164 (which is legacy syslog). * * currently supported format: * * VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG * * is already stripped when this function is entered. VERSION already * has been confirmed to be "1", but has NOT been stripped from the message. * * rger, 2005-11-24 */ BEGINparse uchar *p2parse; uchar *pBuf = NULL; int lenMsg; int bContParse = 1; CODESTARTparse assert(pMsg != NULL); assert(pMsg->pszRawMsg != NULL); p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */ lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; /* check if we are the right parser */ if(lenMsg < 2 || p2parse[0] != '1' || p2parse[1] != ' ') { ABORT_FINALIZE(RS_RET_COULD_NOT_PARSE); } DBGPRINTF("Message has RFC5424/syslog-protocol format.\n"); setProtocolVersion(pMsg, 1); p2parse += 2; lenMsg -= 2; /* Now get us some memory we can use as a work buffer while parsing. * We simply allocated a buffer sufficiently large to hold all of the * message, so we can not run into any troubles. I think this is * wiser than to use individual buffers. */ CHKmalloc(pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1))); /* IMPORTANT NOTE: * Validation is not actually done below nor are any errors handled. I have * NOT included this for the current proof of concept. However, it is strongly * advisable to add it when this code actually goes into production. * rgerhards, 2005-11-24 */ /* TIMESTAMP */ if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) { if(pMsg->msgFlags & IGNDATE) { /* we need to ignore the msg data, so simply copy over reception date */ memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime)); } } else { DBGPRINTF("no TIMESTAMP detected!\n"); bContParse = 0; } /* HOSTNAME */ if(bContParse) { parseRFCField(&p2parse, pBuf, &lenMsg); MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf)); } /* APP-NAME */ if(bContParse) { parseRFCField(&p2parse, pBuf, &lenMsg); MsgSetAPPNAME(pMsg, (char*)pBuf); } /* PROCID */ if(bContParse) { parseRFCField(&p2parse, pBuf, &lenMsg); MsgSetPROCID(pMsg, (char*)pBuf); } /* MSGID */ if(bContParse) { parseRFCField(&p2parse, pBuf, &lenMsg); MsgSetMSGID(pMsg, (char*)pBuf); } /* STRUCTURED-DATA */ if(bContParse) { parseRFCStructuredData(&p2parse, pBuf, &lenMsg); MsgSetStructuredData(pMsg, (char*)pBuf); } /* MSG */ MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg); finalize_it: if(pBuf != NULL) free(pBuf); ENDparse BEGINmodExit CODESTARTmodExit /* release what we no longer need */ objRelease(errmsg, CORE_COMPONENT); objRelease(glbl, CORE_COMPONENT); objRelease(parser, CORE_COMPONENT); objRelease(datetime, CORE_COMPONENT); ENDmodExit BEGINqueryEtryPt CODESTARTqueryEtryPt CODEqueryEtryPt_STD_PMOD_QUERIES CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES ENDqueryEtryPt BEGINmodInit(pmrfc5424) CODESTARTmodInit *ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */ CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(errmsg, CORE_COMPONENT)); CHKiRet(objUse(parser, CORE_COMPONENT)); CHKiRet(objUse(datetime, CORE_COMPONENT)); dbgprintf("rfc5424 parser init called\n"); dbgprintf("GetParserName addr %p\n", GetParserName); ENDmodInit /* vim:set ai: */