From 55e01da2ec3de1b5c6b15e4154235f0eedbb68da Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 9 Jun 2008 12:40:54 +0200 Subject: somewhat improved plain tcp syslog reliability ...by doing a connection check before sending. Credits to Martin Schuette for providing the idea. Details are available at http://blog.gerhards.net/2008/06/reliable-plain-tcp-syslog-once-again.html --- ChangeLog | 4 ++++ doc/rsyslog_conf.html | 1 + runtime/netstrm.c | 10 ++++++++++ runtime/netstrm.h | 1 + runtime/nsd.h | 3 ++- runtime/nsd_gtls.c | 12 ++++++++++++ runtime/nsd_ptcp.c | 29 +++++++++++++++++++++++++++++ tcpclt.c | 36 ++++++++++++++++++++++++++---------- tcpclt.h | 4 +++- tools/omfwd.c | 7 ++++++- 10 files changed, 94 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 190c659d..2cc995bd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,10 @@ Version 3.19.7 (rgerhards), 2008-06-?? - added new property replacer option "date-subseconds" that enables to query just the subsecond part of a high-precision timestamp +- somewhat improved plain tcp syslog reliability by doing a connection + check before sending. Credits to Martin Schuette for providing the + idea. Details are available at + http://blog.gerhards.net/2008/06/reliable-plain-tcp-syslog-once-again.html --------------------------------------------------------------------------- Version 3.19.6 (rgerhards), 2008-06-06 - enhanced property replacer to support multiple regex matches diff --git a/doc/rsyslog_conf.html b/doc/rsyslog_conf.html index efb3ad0c..3946a2d3 100644 --- a/doc/rsyslog_conf.html +++ b/doc/rsyslog_conf.html @@ -120,6 +120,7 @@ default 60000 (1 minute)]
  • $ActionQueueWorkerThreadMinumumMessages <number>, default 100
  • $ActionResumeInterval
  • $ActionResumeRetryCount <number> [default 0, -1 means eternal]
  • +
  • $ActionSendResendLastMsgOnReconn <[on/off]> specifies if the last message is to be resend when a connecition broken and has been reconnedcted. May increase reliability, but comes at the risk of message duplication.
  • $ActionSendStreamDriver <driver basename> just like $DefaultNetstreamDriver, but for the specific action
  • $ActionSendStreamDriverMode <mode>, default 0, mode to use with the stream driver (driver-specific)
  • $ActionSendStreamDriverAuthMode <mode>,  authentication mode to use with the stream driver diff --git a/runtime/netstrm.c b/runtime/netstrm.c index 786ba7f8..2f4a1964 100644 --- a/runtime/netstrm.c +++ b/runtime/netstrm.c @@ -234,6 +234,15 @@ Send(netstrm_t *pThis, uchar *pBuf, ssize_t *pLenBuf) } +/* check connection - slim wrapper for NSD driver function */ +static void +CheckConnection(netstrm_t *pThis) +{ + ISOBJ_TYPE_assert(pThis, netstrm); + pThis->Drvr.CheckConnection(pThis->pDrvrData); +} + + /* get remote hname - slim wrapper for NSD driver function */ static rsRetVal GetRemoteHName(netstrm_t *pThis, uchar **ppsz) @@ -314,6 +323,7 @@ CODESTARTobjQueryInterface(netstrm) pIf->SetDrvrMode = SetDrvrMode; pIf->SetDrvrAuthMode = SetDrvrAuthMode; pIf->SetDrvrPermPeers = SetDrvrPermPeers; + pIf->CheckConnection = CheckConnection; pIf->GetSock = GetSock; finalize_it: ENDobjQueryInterface(netstrm) diff --git a/runtime/netstrm.h b/runtime/netstrm.h index ae135beb..1a97ef23 100644 --- a/runtime/netstrm.h +++ b/runtime/netstrm.h @@ -52,6 +52,7 @@ BEGINinterface(netstrm) /* name must also be changed in ENDinterface macro! */ rsRetVal (*SetDrvrMode)(netstrm_t *pThis, int iMode); rsRetVal (*SetDrvrAuthMode)(netstrm_t *pThis, uchar*); rsRetVal (*SetDrvrPermPeers)(netstrm_t *pThis, permittedPeers_t*); + void (*CheckConnection)(netstrm_t *pThis); /* This is a trick mostly for plain tcp syslog */ /* the GetSock() below is a hack to make imgssapi work. In the long term, * we should migrate imgssapi to a stream driver, which will relieve us of * this problem. Please note that nobody else should use GetSock(). Using it diff --git a/runtime/nsd.h b/runtime/nsd.h index 53693b59..1811f078 100644 --- a/runtime/nsd.h +++ b/runtime/nsd.h @@ -53,6 +53,7 @@ BEGINinterface(nsd) /* name must also be changed in ENDinterface macro! */ rsRetVal (*SetMode)(nsd_t *pThis, int mode); /* sets a driver specific mode - see driver doc for details */ rsRetVal (*SetAuthMode)(nsd_t *pThis, uchar*); /* sets a driver specific mode - see driver doc for details */ rsRetVal (*SetPermPeers)(nsd_t *pThis, permittedPeers_t*); /* sets driver permitted peers for auth needs */ + void (*CheckConnection)(nsd_t *pThis); /* This is a trick mostly for plain tcp syslog */ rsRetVal (*GetSock)(nsd_t *pThis, int *pSock); rsRetVal (*SetSock)(nsd_t *pThis, int sock); /* GetSock() and SetSock() return an error if the driver does not use plain @@ -60,7 +61,7 @@ BEGINinterface(nsd) /* name must also be changed in ENDinterface macro! */ * those drivers that utilize the nsd_ptcp to do some of their work. */ ENDinterface(nsd) -#define nsdCURR_IF_VERSION 2 /* increment whenever you change the interface structure! */ +#define nsdCURR_IF_VERSION 3 /* increment whenever you change the interface structure! */ /* interface for the select call */ BEGINinterface(nsdsel) /* name must also be changed in ENDinterface macro! */ diff --git a/runtime/nsd_gtls.c b/runtime/nsd_gtls.c index 0440f149..567701dc 100644 --- a/runtime/nsd_gtls.c +++ b/runtime/nsd_gtls.c @@ -1243,6 +1243,17 @@ finalize_it: } +/* This function checks if the connection is still alive - well, kind of... + * This is a dummy here. For details, check function common in ptcp driver. + * rgerhards, 2008-06-09 + */ +static void +CheckConnection(nsd_t __attribute__((unused)) *pNsd) +{ + /* dummy, do nothing */ +} + + /* get the remote hostname. The returned hostname must be freed by the caller. * rgerhards, 2008-04-25 */ @@ -1507,6 +1518,7 @@ CODESTARTobjQueryInterface(nsd_gtls) pIf->SetMode = SetMode; pIf->SetAuthMode = SetAuthMode; pIf->SetPermPeers =SetPermPeers; + pIf->CheckConnection = CheckConnection; pIf->GetRemoteHName = GetRemoteHName; pIf->GetRemoteIP = GetRemoteIP; finalize_it: diff --git a/runtime/nsd_ptcp.c b/runtime/nsd_ptcp.c index 14c564a3..ff85619a 100644 --- a/runtime/nsd_ptcp.c +++ b/runtime/nsd_ptcp.c @@ -638,6 +638,34 @@ finalize_it: } +/* This function checks if the connection is still alive - well, kind of... It + * is primarily being used for plain TCP syslog and it is quite a hack. However, + * as it seems to work, it is worth supporting it. The bottom line is that it + * should not be called by anything else but a plain tcp syslog sender. + * In order for it to work, it must be called *immediately* *before* the send() + * call. For details about what is done, see here: + * http://blog.gerhards.net/2008/06/getting-bit-more-reliability-from-plain.html + * rgerhards, 2008-06-09 + */ +static void +CheckConnection(nsd_t *pNsd) +{ + int rc; + char msgbuf[1]; /* dummy */ + nsd_ptcp_t *pThis = (nsd_ptcp_t*) pNsd; + ISOBJ_TYPE_assert(pThis, nsd_ptcp); + + rc = recv(pThis->sock, msgbuf, 1, MSG_DONTWAIT | MSG_PEEK); + if(rc == 0) { + dbgprintf("CheckConnection detected broken connection - closing it\n"); + /* in this case, the remote peer had shut down the connection and we + * need to close our side, too. + */ + sockClose(&pThis->sock); + } +} + + /* get the remote host's IP address. The returned string must be freed by the * caller. * rgerhards, 2008-04-24 @@ -684,6 +712,7 @@ CODESTARTobjQueryInterface(nsd_ptcp) pIf->Connect = Connect; pIf->GetRemoteHName = GetRemoteHName; pIf->GetRemoteIP = GetRemoteIP; + pIf->CheckConnection = CheckConnection; finalize_it: ENDobjQueryInterface(nsd_ptcp) diff --git a/tcpclt.c b/tcpclt.c index 2bc2ea56..c53f00f7 100644 --- a/tcpclt.c +++ b/tcpclt.c @@ -305,16 +305,23 @@ Send(tcpclt_t *pThis, void *pData, char *msg, size_t len) /* we are done, we also use this as indication that the previous * message was succesfully received (it's not always the case, but its at * least our best shot at it -- rgerhards, 2008-03-12 + * As of 2008-06-09, we have implemented an algorithm which detects connection + * loss quite good in some (common) scenarios. Thus, the probability of + * message duplication due to the code below has increased. We so now have + * a config setting, default off, that enables the user to request retransmits. + * However, if not requested, we do NOT need to do all the stuff needed for it. */ - if(pThis->prevMsg != NULL) - free(pThis->prevMsg); - /* if we can not alloc a new buffer, we silently ignore it. The worst that - * happens is that we lose our message recovery buffer - anything else would - * be worse, so don't try anything ;) -- rgerhards, 2008-03-12 - */ - if((pThis->prevMsg = malloc(len)) != NULL) { - memcpy(pThis->prevMsg, msg, len); - pThis->lenPrevMsg = len; + if(pThis->bResendLastOnRecon == 1) { + if(pThis->prevMsg != NULL) + free(pThis->prevMsg); + /* if we can not alloc a new buffer, we silently ignore it. The worst that + * happens is that we lose our message recovery buffer - anything else would + * be worse, so don't try anything ;) -- rgerhards, 2008-03-12 + */ + if((pThis->prevMsg = malloc(len)) != NULL) { + memcpy(pThis->prevMsg, msg, len); + pThis->lenPrevMsg = len; + } } /* we are done with this record */ @@ -324,7 +331,8 @@ Send(tcpclt_t *pThis, void *pData, char *msg, size_t len) ++retry; CHKiRet(pThis->prepRetryFunc(pData)); /* try to recover */ /* now try to send our stored previous message (which most probably - * didn't make it + * didn't make it. Note that if bResendLastOnRecon is 0, prevMsg will + * never become non-NULL, so the check below covers all cases. */ if(pThis->prevMsg != NULL) { CHKiRet(pThis->initFunc(pData)); @@ -346,6 +354,13 @@ finalize_it: /* set functions */ static rsRetVal +SetResendLastOnRecon(tcpclt_t *pThis, int bResendLastOnRecon) +{ + DEFiRet; + pThis->bResendLastOnRecon = (short) bResendLastOnRecon; + RETiRet; +} +static rsRetVal SetSendInit(tcpclt_t *pThis, rsRetVal (*pCB)(void*)) { DEFiRet; @@ -425,6 +440,7 @@ CODESTARTobjQueryInterface(tcpclt) pIf->Send = Send; /* set functions */ + pIf->SetResendLastOnRecon = SetResendLastOnRecon; pIf->SetSendInit = SetSendInit; pIf->SetSendFrame = SetSendFrame; pIf->SetSendPrepRetry = SetSendPrepRetry; diff --git a/tcpclt.h b/tcpclt.h index b7aada65..1d704044 100644 --- a/tcpclt.h +++ b/tcpclt.h @@ -33,6 +33,7 @@ typedef struct tcpclt_s { BEGINobjInstance; /**< Data to implement generic object - MUST be the first data element! */ TCPFRAMINGMODE tcp_framing; char *prevMsg; + short bResendLastOnRecon; /* should the last message be resent on a successful reconnect? */ size_t lenPrevMsg; /* session specific callbacks */ rsRetVal (*initFunc)(void*); @@ -49,12 +50,13 @@ BEGINinterface(tcpclt) /* name must also be changed in ENDinterface macro! */ int (*Send)(tcpclt_t *pThis, void*pData, char*msg, size_t len); int (*CreateSocket)(struct addrinfo *addrDest); /* set methods */ + rsRetVal (*SetResendLastOnRecon)(tcpclt_t*, int); rsRetVal (*SetSendInit)(tcpclt_t*, rsRetVal (*)(void*)); rsRetVal (*SetSendFrame)(tcpclt_t*, rsRetVal (*)(void*, char*, size_t)); rsRetVal (*SetSendPrepRetry)(tcpclt_t*, rsRetVal (*)(void*)); rsRetVal (*SetFraming)(tcpclt_t*, TCPFRAMINGMODE framing); ENDinterface(tcpclt) -#define tcpcltCURR_IF_VERSION 1 /* increment whenever you change the interface structure! */ +#define tcpcltCURR_IF_VERSION 2 /* increment whenever you change the interface structure! */ /* prototypes */ diff --git a/tools/omfwd.c b/tools/omfwd.c index 317bc298..1783ef7d 100644 --- a/tools/omfwd.c +++ b/tools/omfwd.c @@ -97,7 +97,8 @@ typedef struct _instanceData { /* config data */ static uchar *pszTplName = NULL; /* name of the default template to use */ static uchar *pszStrmDrvr = NULL; /* name of the stream driver to use */ -static int iStrmDrvrMode = 0; /* mode for stream driver, driver-dependent (0 mostly means plain tcp) */ +static short iStrmDrvrMode = 0; /* mode for stream driver, driver-dependent (0 mostly means plain tcp) */ +static short bResendLastOnRecon = 0; /* should the last message be re-sent on a successful reconnect? */ static uchar *pszStrmDrvrAuthMode = NULL; /* authentication mode to use */ static permittedPeers_t *pPermPeers = NULL; @@ -254,6 +255,7 @@ static rsRetVal TCPSendFrame(void *pvData, char *msg, size_t len) instanceData *pData = (instanceData *) pvData; lenSend = len; + netstrm.CheckConnection(pData->pNetstrm); /* hack for plain tcp syslog - see ptcp driver for details */ CHKiRet(netstrm.Send(pData->pNetstrm, (uchar*)msg, &lenSend)); dbgprintf("TCP sent %ld bytes, requested %ld\n", (long) lenSend, (long) len); @@ -605,6 +607,7 @@ CODE_STD_STRING_REQUESTparseSelectorAct(1) if(pData->protocol == FORW_TCP) { /* create our tcpclt */ CHKiRet(tcpclt.Construct(&pData->pTCPClt)); + CHKiRet(tcpclt.SetResendLastOnRecon(pData->pTCPClt, bResendLastOnRecon)); /* and set callbacks */ CHKiRet(tcpclt.SetSendInit(pData->pTCPClt, TCPSendInit)); CHKiRet(tcpclt.SetSendFrame(pData->pTCPClt, TCPSendFrame)); @@ -679,6 +682,7 @@ static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __a /* we now must reset all non-string values */ iStrmDrvrMode = 0; + bResendLastOnRecon = 0; return RS_RET_OK; } @@ -697,6 +701,7 @@ CODEmodInit_QueryRegCFSLineHdlr CHKiRet(regCfSysLineHdlr((uchar *)"actionsendstreamdrivermode", 0, eCmdHdlrInt, NULL, &iStrmDrvrMode, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"actionsendstreamdriverauthmode", 0, eCmdHdlrGetWord, NULL, &pszStrmDrvrAuthMode, NULL)); CHKiRet(regCfSysLineHdlr((uchar *)"actionsendstreamdriverpermittedpeer", 0, eCmdHdlrGetWord, setPermittedPeer, NULL, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"actionsendresendlastmsgonreconnect", 0, eCmdHdlrBinary, NULL, &bResendLastOnRecon, NULL)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, STD_LOADABLE_MODULE_ID)); ENDmodInit -- cgit