diff options
author | Rainer Gerhards <rgerhards@adiscon.com> | 2011-06-21 15:11:21 +0200 |
---|---|---|
committer | Rainer Gerhards <rgerhards@adiscon.com> | 2011-06-21 15:11:21 +0200 |
commit | 242c263dc2eaa5c148d481503dae0498ee626103 (patch) | |
tree | 3d64994a124fdc5b9346c4bfef4e4d8938d50603 | |
parent | d5f78989d8fd2a3c11fd1e6ed1cd4d688b1e6728 (diff) | |
parent | 86225089f2d0e82deb368e1688464e8ba84d24cf (diff) | |
download | rsyslog-242c263dc2eaa5c148d481503dae0498ee626103.tar.gz rsyslog-242c263dc2eaa5c148d481503dae0498ee626103.tar.xz rsyslog-242c263dc2eaa5c148d481503dae0498ee626103.zip |
Merge branch 'v5-stable' into v5-devel
Conflicts:
ChangeLog
configure.ac
doc/manual.html
-rw-r--r-- | ChangeLog | 26 | ||||
-rw-r--r-- | action.c | 20 | ||||
-rw-r--r-- | runtime/queue.c | 12 | ||||
-rw-r--r-- | runtime/rule.c | 1 | ||||
-rw-r--r-- | tests/Makefile.am | 14 | ||||
-rwxr-xr-x | tests/diag.sh | 6 | ||||
-rwxr-xr-x | tests/failover-async.sh | 2 | ||||
-rwxr-xr-x | tests/failover-double.sh | 12 | ||||
-rwxr-xr-x | tests/sndrcv_drvr.sh | 50 | ||||
-rwxr-xr-x | tests/sndrcv_drvr_noexit.sh | 49 | ||||
-rwxr-xr-x | tests/sndrcv_failover.sh | 21 | ||||
-rw-r--r-- | tests/testsuites/failover-double.conf | 9 | ||||
-rw-r--r-- | tests/testsuites/sndrcv_failover_rcvr.conf | 11 | ||||
-rw-r--r-- | tests/testsuites/sndrcv_failover_sender.conf | 13 |
14 files changed, 177 insertions, 69 deletions
@@ -12,6 +12,20 @@ Version 5.9.1 [V5-DEVEL] (rgerhards), 2011-06-?? - bugfix: timestamp was incorrectly calculated for timezones with minute offset closes: http://bugzilla.adiscon.com/show_bug.cgi?id=271 +- bugfix: problems in failover action handling + closes: http://bugzilla.adiscon.com/show_bug.cgi?id=270 + closes: http://bugzilla.adiscon.com/show_bug.cgi?id=254 +- bugfix: mutex was invalidly left unlocked during action processing + At least one case where this can occur is during thread shutdown, which + may be initiated by lower activity. In most cases, this is quite + unlikely to happen. However, if it does, data structures may be + corrupted which could lead to fatal failure and segfault. I detected + this via a testbench test, not a user report. But I assume that some + users may have had unreproducable aborts that were cause by this bug. +- bugfix: memory leak in imtcp & subsystems under some circumstances + This leak is tied to error conditions which lead to incorrect cleanup + of some data structures. [backport from v6] +- bugfix/improvement:$WorkDirectory now gracefully handles trailing slashes --------------------------------------------------------------------------- Version 5.9.0 [V5-DEVEL] (rgerhards), 2011-06-08 - imfile: added $InputFileMaxLinesAtOnce directive @@ -47,9 +61,17 @@ Version 5.9.0 [V5-DEVEL] (rgerhards), 2011-06-08 affected directive was: $ActionExecOnlyWhenPreviousIsSuspended on closes: http://bugzilla.adiscon.com/show_bug.cgi?id=236 --------------------------------------------------------------------------- -Version 5.8.2 [V5-stable] (rgerhards), 2011-06-?? +Version 5.8.2 [V5-stable] (rgerhards), 2011-06-21 - bugfix: problems in failover action handling - closes: http://bugzilla.adiscon.com/show_bug.cgi?id=270 (not yet confirmed!) + closes: http://bugzilla.adiscon.com/show_bug.cgi?id=270 + closes: http://bugzilla.adiscon.com/show_bug.cgi?id=254 +- bugfix: mutex was invalidly left unlocked during action processing + At least one case where this can occur is during thread shutdown, which + may be initiated by lower activity. In most cases, this is quite + unlikely to happen. However, if it does, data structures may be + corrupted which could lead to fatal failure and segfault. I detected + this via a testbench test, not a user report. But I assume that some + users may have had unreproducable aborts that were cause by this bug. - bugfix: memory leak in imtcp & subsystems under some circumstances This leak is tied to error conditions which lead to incorrect cleanup of some data structures. [backport from v6] @@ -984,11 +984,13 @@ dbgprintf("XXXXX: tryDoAction %p, pnElem %d, nElem %d\n", pAction, *pnElem, if(localRet == RS_RET_OK) { /* mark messages as committed */ while(iCommittedUpTo <= i) { + pBatch->pElem[iCommittedUpTo].bPrevWasSuspended = 0; /* we had success! */ pBatch->pElem[iCommittedUpTo++].state = BATCH_STATE_COMM; } } else if(localRet == RS_RET_PREVIOUS_COMMITTED) { /* mark messages as committed */ while(iCommittedUpTo < i) { + pBatch->pElem[iCommittedUpTo].bPrevWasSuspended = 0; /* we had success! */ pBatch->pElem[iCommittedUpTo++].state = BATCH_STATE_COMM; } pBatch->pElem[i].state = BATCH_STATE_SUB; @@ -1568,6 +1570,9 @@ doQueueEnqObjDirectBatch(action_t *pAction, batch_t *pBatch) } if(pBatch->pElem[i].bFilterOK) bNeedSubmit = 1; + DBGPRINTF("action %p[%d]: filterOK:%d state:%d execWhenPrev:%d prevWasSusp:%d\n", + pAction, i, pBatch->pElem[i].bFilterOK, pBatch->pElem[i].state, + pAction->bExecWhenPrevSusp, pBatch->pElem[i].bPrevWasSuspended); } if(bNeedSubmit) { iRet = qqueueEnqObjDirectBatch(pAction->pQueue, pBatch); @@ -1576,6 +1581,9 @@ doQueueEnqObjDirectBatch(action_t *pAction, batch_t *pBatch) } if(bModifiedFilter) { for(i = 0 ; i < batchNumMsgs(pBatch) ; ++i) { + DBGPRINTF("action %p: filterOK:%d state:%d execWhenPrev:%d prevWasSusp:%d\n", + pAction, pBatch->pElem[i].bFilterOK, pBatch->pElem[i].state, + pAction->bExecWhenPrevSusp, pBatch->pElem[i].bPrevWasSuspended); /* note: clang static code analyzer reports a false positive below */ pBatch->pElem[i].bFilterOK = pFilterSave[i]; } @@ -1599,21 +1607,17 @@ doSubmitToActionQBatch(action_t *pAction, batch_t *pBatch) int i; DEFiRet; - /* TODO - ich arbeite an dieser funktion, es müssen die verscheidenen modi geprüft werden. ausserdem - muss geschaut werden, in welche anderen funktionen die neue Funktionalität noch eingebaut - werden muss, bzw. ob man das an zentralerer stelle machen kann. Am besten die gesamte - filter evaluation nochmal druchgehen (also das füllen des arrays). - */ - DBGPRINTF("Called action(Batch), logging to %s\n", module.GetStateName(pAction->pMod)); if(pAction->pQueue->qType == QUEUETYPE_DIRECT) { iRet = doQueueEnqObjDirectBatch(pAction, pBatch); - } else { /* in this case, we do single submits to the queue. + } else {/* in this case, we do single submits to the queue. * TODO: optimize this, we may do at least a multi-submit! */ for(i = 0 ; i < batchNumMsgs(pBatch) && !*(pBatch->pbShutdownImmediate) ; ++i) { + DBGPRINTF("action %p: filterOK:%d state:%d execWhenPrev:%d prevWasSusp:%d\n", + pAction, pBatch->pElem[i].bFilterOK, pBatch->pElem[i].state, + pAction->bExecWhenPrevSusp, pBatch->pElem[i].bPrevWasSuspended); if( pBatch->pElem[i].bFilterOK && pBatch->pElem[i].state != BATCH_STATE_DISC && (pAction->bExecWhenPrevSusp == 0 || pBatch->pElem[i].bPrevWasSuspended == 1)) { diff --git a/runtime/queue.c b/runtime/queue.c index d31a4551..c831836d 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -1721,6 +1721,7 @@ static rsRetVal ConsumerReg(qqueue_t *pThis, wti_t *pWti) { int iCancelStateSave; + int bNeedReLock = 0; /**< do we need to lock the mutex again? */ DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); @@ -1741,6 +1742,7 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti) /* we now have a non-idle batch of work, so we can release the queue mutex and process it */ d_pthread_mutex_unlock(pThis->mut); + bNeedReLock = 1; /* at this spot, we may be cancelled */ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &iCancelStateSave); @@ -1760,12 +1762,14 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti) /* but now cancellation is no longer permitted */ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); - /* now we are done, but need to re-aquire the mutex */ - d_pthread_mutex_lock(pThis->mut); - finalize_it: - dbgprintf("regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet, + DBGPRINTF("regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet, getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + + /* now we are done, but potentially need to re-aquire the mutex */ + if(bNeedReLock) + d_pthread_mutex_lock(pThis->mut); + RETiRet; } diff --git a/runtime/rule.c b/runtime/rule.c index 19239d61..0776e2dc 100644 --- a/runtime/rule.c +++ b/runtime/rule.c @@ -279,7 +279,6 @@ processBatch(rule_t *pThis, batch_t *pBatch) if(localRet != RS_RET_OK) { DBGPRINTF("processBatch: iRet %d returned from shouldProcessThisMessage, " "ignoring message\n", localRet); - pBatch->pElem[i].bFilterOK = 0; } if(pBatch->pElem[i].bFilterOK) { diff --git a/tests/Makefile.am b/tests/Makefile.am index 3ee69413..77381d2f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -18,6 +18,7 @@ TESTS += \ imtcp_conndrop.sh \ imtcp_addtlframedelim.sh \ sndrcv.sh \ + sndrcv_failover.sh \ sndrcv_gzip.sh \ sndrcv_udp.sh \ sndrcv_udp_nonstdpt.sh \ @@ -52,6 +53,8 @@ TESTS += \ discard-rptdmsg.sh \ discard-allmark.sh \ discard.sh \ + failover-async.sh \ + failover-double.sh \ failover-basic.sh \ failover-rptd.sh \ failover-no-rptd.sh \ @@ -98,7 +101,8 @@ endif if ENABLE_GNUTLS if HAVE_VALGRIND -TESTS += manytcp-too-few-tls.sh +# This test does not work on v5 as we keep DH params +#TESTS += manytcp-too-few-tls.sh endif endif @@ -285,6 +289,10 @@ EXTRA_DIST= 1.rstest 2.rstest 3.rstest err1.rstest \ failover-basic.sh \ failover-basic-vg.sh \ testsuites/failover-basic.conf \ + failover-async.sh \ + testsuites/failover-async.conf \ + failover-double.sh \ + testsuites/failover-double.conf \ discard-rptdmsg.sh \ discard-rptdmsg-vg.sh \ testsuites/discard-rptdmsg.conf \ @@ -307,6 +315,10 @@ EXTRA_DIST= 1.rstest 2.rstest 3.rstest err1.rstest \ threadingmqaq.sh \ testsuites/threadingmqaq.conf \ sndrcv_drvr.sh \ + sndrcv_drvr_noexit.sh \ + sndrcv_failover.sh \ + testsuites/sndrcv_failover_sender.conf \ + testsuites/sndrcv_failover_rcvr.conf \ sndrcv.sh \ testsuites/sndrcv_sender.conf \ testsuites/sndrcv_rcvr.conf \ diff --git a/tests/diag.sh b/tests/diag.sh index d1242fb1..617118ba 100755 --- a/tests/diag.sh +++ b/tests/diag.sh @@ -10,7 +10,7 @@ #valgrind="valgrind --tool=helgrind --log-fd=1" #valgrind="valgrind --tool=exp-ptrcheck --log-fd=1" #set -o xtrace -#export RSYSLOG_DEBUG="debug nologfuncflow nostdout" +#export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction stdout" #export RSYSLOG_DEBUGLOG="log" case $1 in 'init') $srcdir/killrsyslog.sh # kill rsyslogd if it runs for some reason @@ -22,7 +22,7 @@ case $1 in rm -f work rsyslog.out.log rsyslog2.out.log rsyslog.out.log.save # common work files rm -rf test-spool test-logdir rm -f rsyslog.out.*.log work-presort rsyslog.pipe - rm -f rsyslog.input + rm -f rsyslog.input rsyslog.empty rm -f core.* vgcore.* mkdir test-spool ;; @@ -31,7 +31,7 @@ case $1 in rm -f work rsyslog.out.log rsyslog2.out.log rsyslog.out.log.save # common work files rm -rf test-spool test-logdir rm -f rsyslog.out.*.log rsyslog.random.data work-presort rsyslog.pipe - rm -f rsyslog.input stat-file1 + rm -f rsyslog.input stat-file1 #rsyslog.empty echo ------------------------------------------------------------------------------- ;; 'startup') # start rsyslogd with default params. $2 is the config file name to use diff --git a/tests/failover-async.sh b/tests/failover-async.sh index f17bc0ff..5fc393de 100755 --- a/tests/failover-async.sh +++ b/tests/failover-async.sh @@ -3,7 +3,7 @@ echo =========================================================================== echo \[failover-async.sh\]: async test for failover functionality source $srcdir/diag.sh init source $srcdir/diag.sh startup failover-async.conf -source $srcdir/diag.sh injectmsg 0 5 +source $srcdir/diag.sh injectmsg 0 5000 echo doing shutdown source $srcdir/diag.sh shutdown-when-empty echo wait on shutdown diff --git a/tests/failover-double.sh b/tests/failover-double.sh new file mode 100755 index 00000000..172b91de --- /dev/null +++ b/tests/failover-double.sh @@ -0,0 +1,12 @@ +# This file is part of the rsyslog project, released under GPLv3 +echo =============================================================================== +echo \[failover-double.sh\]: test for double failover functionality +source $srcdir/diag.sh init +source $srcdir/diag.sh startup failover-double.conf +source $srcdir/diag.sh injectmsg 0 5000 +echo doing shutdown +source $srcdir/diag.sh shutdown-when-empty +echo wait on shutdown +source $srcdir/diag.sh wait-shutdown +source $srcdir/diag.sh seq-check 0 4999 +source $srcdir/diag.sh exit diff --git a/tests/sndrcv_drvr.sh b/tests/sndrcv_drvr.sh index 3d613069..f9092647 100755 --- a/tests/sndrcv_drvr.sh +++ b/tests/sndrcv_drvr.sh @@ -1,50 +1,2 @@ -# This is test driver for testing two rsyslog instances. It can be -# utilized by any test that just needs two instances with different -# config files, where messages are injected in instance TWO and -# (with whatever rsyslog mechanism) being relayed over to instance ONE, -# where they are written to the log file. After the run, the completeness -# of that log file is checked. -# The code is almost the same, but the config files differ (probably greatly) -# for different test cases. As such, this driver needs to be called with the -# config file name ($2). From that name, the sender and receiver config file -# names are automatically generated. -# So: $1 config file name, $2 number of messages -# -# A note on TLS testing: the current testsuite (in git!) already contains -# TLS test cases. However, getting these test cases correct is not simple. -# That's not a problem with the code itself, but rater a problem with -# synchronization in the test environment. So I have deciced to keep the -# TLS tests in, but not yet actually utilize them. This is most probably -# left as an excercise for future (devel) releases. -- rgerhards, 2009-11-11 -# -# added 2009-11-11 by Rgerhards -# This file is part of the rsyslog project, released under GPLv3 -# uncomment for debugging support: -source $srcdir/diag.sh init -# start up the instances -#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction" -#export RSYSLOG_DEBUGLOG="log" -source $srcdir/diag.sh startup $1_rcvr.conf -source $srcdir/diag.sh wait-startup -#export RSYSLOG_DEBUGLOG="log2" -#valgrind="valgrind" -source $srcdir/diag.sh startup $1_sender.conf 2 -source $srcdir/diag.sh wait-startup 2 -# may be needed by TLS (once we do it): sleep 30 - -# now inject the messages into instance 2. It will connect to instance 1, -# and that instance will record the data. -source $srcdir/diag.sh tcpflood -m$2 -i1 -sleep 2 # make sure all data is received in input buffers -# shut down sender when everything is sent, receiver continues to run concurrently -# may be needed by TLS (once we do it): sleep 60 -source $srcdir/diag.sh shutdown-when-empty 2 -source $srcdir/diag.sh wait-shutdown 2 -# now it is time to stop the receiver as well -source $srcdir/diag.sh shutdown-when-empty -source $srcdir/diag.sh wait-shutdown - -# may be needed by TLS (once we do it): sleep 60 -# do the final check -source $srcdir/diag.sh seq-check 1 $2 +source $srcdir/sndrcv_drvr_noexit.sh $1 $2 source $srcdir/diag.sh exit diff --git a/tests/sndrcv_drvr_noexit.sh b/tests/sndrcv_drvr_noexit.sh new file mode 100755 index 00000000..899eace3 --- /dev/null +++ b/tests/sndrcv_drvr_noexit.sh @@ -0,0 +1,49 @@ +# This is test driver for testing two rsyslog instances. It can be +# utilized by any test that just needs two instances with different +# config files, where messages are injected in instance TWO and +# (with whatever rsyslog mechanism) being relayed over to instance ONE, +# where they are written to the log file. After the run, the completeness +# of that log file is checked. +# The code is almost the same, but the config files differ (probably greatly) +# for different test cases. As such, this driver needs to be called with the +# config file name ($2). From that name, the sender and receiver config file +# names are automatically generated. +# So: $1 config file name, $2 number of messages +# +# A note on TLS testing: the current testsuite (in git!) already contains +# TLS test cases. However, getting these test cases correct is not simple. +# That's not a problem with the code itself, but rater a problem with +# synchronization in the test environment. So I have deciced to keep the +# TLS tests in, but not yet actually utilize them. This is most probably +# left as an excercise for future (devel) releases. -- rgerhards, 2009-11-11 +# +# added 2009-11-11 by Rgerhards +# This file is part of the rsyslog project, released under GPLv3 +# uncomment for debugging support: +source $srcdir/diag.sh init +# start up the instances +#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction" +#export RSYSLOG_DEBUGLOG="log" +source $srcdir/diag.sh startup $1_rcvr.conf +source $srcdir/diag.sh wait-startup +#export RSYSLOG_DEBUGLOG="log2" +#valgrind="valgrind" +source $srcdir/diag.sh startup $1_sender.conf 2 +source $srcdir/diag.sh wait-startup 2 +# may be needed by TLS (once we do it): sleep 30 + +# now inject the messages into instance 2. It will connect to instance 1, +# and that instance will record the data. +source $srcdir/diag.sh tcpflood -m$2 -i1 +sleep 2 # make sure all data is received in input buffers +# shut down sender when everything is sent, receiver continues to run concurrently +# may be needed by TLS (once we do it): sleep 60 +source $srcdir/diag.sh shutdown-when-empty 2 +source $srcdir/diag.sh wait-shutdown 2 +# now it is time to stop the receiver as well +source $srcdir/diag.sh shutdown-when-empty +source $srcdir/diag.sh wait-shutdown + +# may be needed by TLS (once we do it): sleep 60 +# do the final check +source $srcdir/diag.sh seq-check 1 $2 diff --git a/tests/sndrcv_failover.sh b/tests/sndrcv_failover.sh new file mode 100755 index 00000000..4c5e1831 --- /dev/null +++ b/tests/sndrcv_failover.sh @@ -0,0 +1,21 @@ +# This tests failover capabilities. Data is sent to local port 13516, where +# no process shall listen. Then it fails over to a second instance, then to +# a file. The second instance is started. So all data should be received +# there and none be logged to the file. +# This builds on the basic sndrcv.sh test, but adds a first, failing, +# location to the conf file. +# added 2011-06-20 by Rgerhards +# This file is part of the rsyslog project, released under GPLv3 +echo =============================================================================== +echo \[sndrcv_failover.sh\]: testing failover capabilities for tcp sending +source $srcdir/sndrcv_drvr_noexit.sh sndrcv_failover 50000 +ls -l rsyslog.empty +if [[ -s rsyslog.empty ]] ; then + echo "FAIL: rsyslog.empty has data. Failover handling failed. Data is written" + echo " even though the previous action (in a failover chain!) properly" + echo " worked." + exit 1 +else + echo "rsyslog.empty is empty - OK" +fi ; +source $srcdir/diag.sh exit diff --git a/tests/testsuites/failover-double.conf b/tests/testsuites/failover-double.conf new file mode 100644 index 00000000..a9991321 --- /dev/null +++ b/tests/testsuites/failover-double.conf @@ -0,0 +1,9 @@ +$IncludeConfig diag-common.conf + +$template outfmt,"%msg:F,58:2%\n" + +:msg, contains, "msgnum:" @@127.0.0.1:13516 +$ActionExecOnlyWhenPreviousIsSuspended on +& @@127.0.0.1:1234 +& ./rsyslog.out.log;outfmt +$ActionExecOnlyWhenPreviousIsSuspended off diff --git a/tests/testsuites/sndrcv_failover_rcvr.conf b/tests/testsuites/sndrcv_failover_rcvr.conf new file mode 100644 index 00000000..6f7ce34b --- /dev/null +++ b/tests/testsuites/sndrcv_failover_rcvr.conf @@ -0,0 +1,11 @@ +# see equally-named shell file for details +# rgerhards, 2009-11-11 +$IncludeConfig diag-common.conf + +$ModLoad ../plugins/imtcp/.libs/imtcp +# then SENDER sends to this port (not tcpflood!) +$InputTCPServerRun 13515 + +$template outfmt,"%msg:F,58:2%\n" +$template dynfile,"rsyslog.out.log" # trick to use relative path names! +:msg, contains, "msgnum:" ?dynfile;outfmt diff --git a/tests/testsuites/sndrcv_failover_sender.conf b/tests/testsuites/sndrcv_failover_sender.conf new file mode 100644 index 00000000..b8e7c186 --- /dev/null +++ b/tests/testsuites/sndrcv_failover_sender.conf @@ -0,0 +1,13 @@ +# see tcpsndrcv.sh for details +# rgerhards, 2009-11-11 +$IncludeConfig diag-common2.conf + +$ModLoad ../plugins/imtcp/.libs/imtcp +# this listener is for message generation by the test framework! +$InputTCPServerRun 13514 + +*.* @@127.0.0.1:13516 # this must be DEAD +$ActionExecOnlyWhenPreviousIsSuspended on +& @@127.0.0.1:13515 +& ./rsyslog.empty +$ActionExecOnlyWhenPreviousIsSuspended off |