summaryrefslogtreecommitdiffstats
path: root/tools/pmrfc5424.c
diff options
context:
space:
mode:
authorRainer Gerhards <rgerhards@adiscon.com>2009-11-04 10:40:27 +0100
committerRainer Gerhards <rgerhards@adiscon.com>2009-11-04 10:40:27 +0100
commit1b7f5c54684db29c096e09238648a45dce78ebee (patch)
tree7db5f2d5ea45b53109b0f9f585fca845fa0773fe /tools/pmrfc5424.c
parentb41e0d51f54a89f489bbf1c1bf5f85d576ae4049 (diff)
downloadrsyslog-1b7f5c54684db29c096e09238648a45dce78ebee.tar.gz
rsyslog-1b7f5c54684db29c096e09238648a45dce78ebee.tar.xz
rsyslog-1b7f5c54684db29c096e09238648a45dce78ebee.zip
moved rfc3164/5424 code to new parser modules
another milestone commit: the program works, the new interface is used, some more cleanup is needed and the per-ruleset config options are still missing. But we are getting closer...
Diffstat (limited to 'tools/pmrfc5424.c')
-rw-r--r--tools/pmrfc5424.c228
1 files changed, 228 insertions, 0 deletions
diff --git a/tools/pmrfc5424.c b/tools/pmrfc5424.c
index acc21817..2fa2c981 100644
--- a/tools/pmrfc5424.c
+++ b/tools/pmrfc5424.c
@@ -27,6 +27,7 @@
*/
#include "config.h"
#include "rsyslog.h"
+#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
@@ -39,6 +40,7 @@
#include "glbl.h"
#include "errmsg.h"
#include "parser.h"
+#include "datetime.h"
#include "unicode-helper.h"
MODULE_TYPE_PARSER
@@ -50,6 +52,7 @@ DEF_PMOD_STATIC_DATA
DEFobjCurrIf(errmsg)
DEFobjCurrIf(glbl)
DEFobjCurrIf(parser)
+DEFobjCurrIf(datetime)
/* config data */
@@ -60,8 +63,231 @@ CODESTARTisCompatibleWithFeature
ENDisCompatibleWithFeature
+/* Helper to parseRFCSyslogMsg. This function parses a field up to
+ * (and including) the SP character after it. The field contents is
+ * returned in a caller-provided buffer. The parsepointer is advanced
+ * to after the terminating SP. The caller must ensure that the
+ * provided buffer is large enough to hold the to be extracted value.
+ * Returns 0 if everything is fine or 1 if either the field is not
+ * SP-terminated or any other error occurs. -- rger, 2005-11-24
+ * The function now receives the size of the string and makes sure
+ * that it does not process more than that. The *pLenStr counter is
+ * updated on exit. -- rgerhards, 2009-09-23
+ */
+static int parseRFCField(uchar **pp2parse, uchar *pResult, int *pLenStr)
+{
+ uchar *p2parse;
+ int iRet = 0;
+
+ assert(pp2parse != NULL);
+ assert(*pp2parse != NULL);
+ assert(pResult != NULL);
+
+ p2parse = *pp2parse;
+
+ /* this is the actual parsing loop */
+ while(*pLenStr > 0 && *p2parse != ' ') {
+ *pResult++ = *p2parse++;
+ --(*pLenStr);
+ }
+
+ if(*pLenStr > 0 && *p2parse == ' ') {
+ ++p2parse; /* eat SP, but only if not at end of string */
+ --(*pLenStr);
+ } else {
+ iRet = 1; /* there MUST be an SP! */
+ }
+ *pResult = '\0';
+
+ /* set the new parse pointer */
+ *pp2parse = p2parse;
+ return 0;
+}
+
+
+/* Helper to parseRFCSyslogMsg. This function parses the structured
+ * data field of a message. It does NOT parse inside structured data,
+ * just gets the field as whole. Parsing the single entities is left
+ * to other functions. The parsepointer is advanced
+ * to after the terminating SP. The caller must ensure that the
+ * provided buffer is large enough to hold the to be extracted value.
+ * Returns 0 if everything is fine or 1 if either the field is not
+ * SP-terminated or any other error occurs. -- rger, 2005-11-24
+ * The function now receives the size of the string and makes sure
+ * that it does not process more than that. The *pLenStr counter is
+ * updated on exit. -- rgerhards, 2009-09-23
+ */
+static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr)
+{
+ uchar *p2parse;
+ int bCont = 1;
+ int iRet = 0;
+ int lenStr;
+
+ assert(pp2parse != NULL);
+ assert(*pp2parse != NULL);
+ assert(pResult != NULL);
+
+ p2parse = *pp2parse;
+ lenStr = *pLenStr;
+
+ /* this is the actual parsing loop
+ * Remeber: structured data starts with [ and includes any characters
+ * until the first ] followed by a SP. There may be spaces inside
+ * structured data. There may also be \] inside the structured data, which
+ * do NOT terminate an element.
+ */
+ if(lenStr == 0 || *p2parse != '[')
+ return 1; /* this is NOT structured data! */
+
+ if(*p2parse == '-') { /* empty structured data? */
+ *pResult++ = '-';
+ ++p2parse;
+ --lenStr;
+ } else {
+ while(bCont) {
+ if(lenStr < 2) {
+ /* we now need to check if we have only structured data */
+ if(lenStr > 0 && *p2parse == ']') {
+ *pResult++ = *p2parse;
+ p2parse++;
+ lenStr--;
+ bCont = 0;
+ } else {
+ iRet = 1; /* this is not valid! */
+ bCont = 0;
+ }
+ } else if(*p2parse == '\\' && *(p2parse+1) == ']') {
+ /* this is escaped, need to copy both */
+ *pResult++ = *p2parse++;
+ *pResult++ = *p2parse++;
+ lenStr -= 2;
+ } else if(*p2parse == ']' && *(p2parse+1) == ' ') {
+ /* found end, just need to copy the ] and eat the SP */
+ *pResult++ = *p2parse;
+ p2parse += 2;
+ lenStr -= 2;
+ bCont = 0;
+ } else {
+ *pResult++ = *p2parse++;
+ --lenStr;
+ }
+ }
+ }
+
+ if(lenStr > 0 && *p2parse == ' ') {
+ ++p2parse; /* eat SP, but only if not at end of string */
+ --lenStr;
+ } else {
+ iRet = 1; /* there MUST be an SP! */
+ }
+ *pResult = '\0';
+
+ /* set the new parse pointer */
+ *pp2parse = p2parse;
+ *pLenStr = lenStr;
+ return 0;
+}
+
+/* parse a RFC5424-formatted syslog message. This function returns
+ * 0 if processing of the message shall continue and 1 if something
+ * went wrong and this messe should be ignored. This function has been
+ * implemented in the effort to support syslog-protocol. Please note that
+ * the name (parse *RFC*) stems from the hope that syslog-protocol will
+ * some time become an RFC. Do not confuse this with informational
+ * RFC 3164 (which is legacy syslog).
+ *
+ * currently supported format:
+ *
+ * <PRI>VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID SP [SD-ID]s SP MSG
+ *
+ * <PRI> is already stripped when this function is entered. VERSION already
+ * has been confirmed to be "1", but has NOT been stripped from the message.
+ *
+ * rger, 2005-11-24
+ */
+//static int parseRFCSyslogMsg(msg_t *pMsg, int flags)
BEGINparse
+ uchar *p2parse;
+ uchar *pBuf = NULL;
+ int lenMsg;
+ int bContParse = 1;
CODESTARTparse
+ assert(pMsg != NULL);
+ assert(pMsg->pszRawMsg != NULL);
+ p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */
+ lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI;
+
+ /* check if we are the right parser */
+ if(lenMsg < 2 || p2parse[0] != '1' || p2parse[1] != ' ') {
+ ABORT_FINALIZE(RS_RET_COULD_NOT_PARSE);
+ }
+ DBGPRINTF("Message has RFC5424/syslog-protocol format.\n");
+ setProtocolVersion(pMsg, 1);
+ p2parse += 2;
+ lenMsg -= 2;
+
+ /* Now get us some memory we can use as a work buffer while parsing.
+ * We simply allocated a buffer sufficiently large to hold all of the
+ * message, so we can not run into any troubles. I think this is
+ * wiser than to use individual buffers.
+ */
+ CHKmalloc(pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1)));
+
+ /* IMPORTANT NOTE:
+ * Validation is not actually done below nor are any errors handled. I have
+ * NOT included this for the current proof of concept. However, it is strongly
+ * advisable to add it when this code actually goes into production.
+ * rgerhards, 2005-11-24
+ */
+
+ /* TIMESTAMP */
+ if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
+ if(pMsg->msgFlags & IGNDATE) {
+ /* we need to ignore the msg data, so simply copy over reception date */
+ memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime));
+ }
+ } else {
+ DBGPRINTF("no TIMESTAMP detected!\n");
+ bContParse = 0;
+ }
+
+ /* HOSTNAME */
+ if(bContParse) {
+ parseRFCField(&p2parse, pBuf, &lenMsg);
+ MsgSetHOSTNAME(pMsg, pBuf, ustrlen(pBuf));
+ }
+
+ /* APP-NAME */
+ if(bContParse) {
+ parseRFCField(&p2parse, pBuf, &lenMsg);
+ MsgSetAPPNAME(pMsg, (char*)pBuf);
+ }
+
+ /* PROCID */
+ if(bContParse) {
+ parseRFCField(&p2parse, pBuf, &lenMsg);
+ MsgSetPROCID(pMsg, (char*)pBuf);
+ }
+
+ /* MSGID */
+ if(bContParse) {
+ parseRFCField(&p2parse, pBuf, &lenMsg);
+ MsgSetMSGID(pMsg, (char*)pBuf);
+ }
+
+ /* STRUCTURED-DATA */
+ if(bContParse) {
+ parseRFCStructuredData(&p2parse, pBuf, &lenMsg);
+ MsgSetStructuredData(pMsg, (char*)pBuf);
+ }
+
+ /* MSG */
+ MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg);
+
+finalize_it:
+ if(pBuf != NULL)
+ free(pBuf);
ENDparse
@@ -71,6 +297,7 @@ CODESTARTmodExit
objRelease(errmsg, CORE_COMPONENT);
objRelease(glbl, CORE_COMPONENT);
objRelease(parser, CORE_COMPONENT);
+ objRelease(datetime, CORE_COMPONENT);
ENDmodExit
@@ -88,6 +315,7 @@ CODEmodInit_QueryRegCFSLineHdlr
CHKiRet(objUse(glbl, CORE_COMPONENT));
CHKiRet(objUse(errmsg, CORE_COMPONENT));
CHKiRet(objUse(parser, CORE_COMPONENT));
+ CHKiRet(objUse(datetime, CORE_COMPONENT));
dbgprintf("rfc5424 parser init called\n");
dbgprintf("GetParserName addr %p\n", GetParserName);