summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRainer Gerhards <rgerhards@adiscon.com>2011-06-21 15:11:21 +0200
committerRainer Gerhards <rgerhards@adiscon.com>2011-06-21 15:11:21 +0200
commit242c263dc2eaa5c148d481503dae0498ee626103 (patch)
tree3d64994a124fdc5b9346c4bfef4e4d8938d50603
parentd5f78989d8fd2a3c11fd1e6ed1cd4d688b1e6728 (diff)
parent86225089f2d0e82deb368e1688464e8ba84d24cf (diff)
downloadrsyslog-242c263dc2eaa5c148d481503dae0498ee626103.tar.gz
rsyslog-242c263dc2eaa5c148d481503dae0498ee626103.tar.xz
rsyslog-242c263dc2eaa5c148d481503dae0498ee626103.zip
Merge branch 'v5-stable' into v5-devel
Conflicts: ChangeLog configure.ac doc/manual.html
-rw-r--r--ChangeLog26
-rw-r--r--action.c20
-rw-r--r--runtime/queue.c12
-rw-r--r--runtime/rule.c1
-rw-r--r--tests/Makefile.am14
-rwxr-xr-xtests/diag.sh6
-rwxr-xr-xtests/failover-async.sh2
-rwxr-xr-xtests/failover-double.sh12
-rwxr-xr-xtests/sndrcv_drvr.sh50
-rwxr-xr-xtests/sndrcv_drvr_noexit.sh49
-rwxr-xr-xtests/sndrcv_failover.sh21
-rw-r--r--tests/testsuites/failover-double.conf9
-rw-r--r--tests/testsuites/sndrcv_failover_rcvr.conf11
-rw-r--r--tests/testsuites/sndrcv_failover_sender.conf13
14 files changed, 177 insertions, 69 deletions
diff --git a/ChangeLog b/ChangeLog
index c13cc142..4c315819 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,6 +12,20 @@ Version 5.9.1 [V5-DEVEL] (rgerhards), 2011-06-??
- bugfix: timestamp was incorrectly calculated for timezones with minute
offset
closes: http://bugzilla.adiscon.com/show_bug.cgi?id=271
+- bugfix: problems in failover action handling
+ closes: http://bugzilla.adiscon.com/show_bug.cgi?id=270
+ closes: http://bugzilla.adiscon.com/show_bug.cgi?id=254
+- bugfix: mutex was invalidly left unlocked during action processing
+ At least one case where this can occur is during thread shutdown, which
+ may be initiated by lower activity. In most cases, this is quite
+ unlikely to happen. However, if it does, data structures may be
+ corrupted which could lead to fatal failure and segfault. I detected
+ this via a testbench test, not a user report. But I assume that some
+ users may have had unreproducable aborts that were cause by this bug.
+- bugfix: memory leak in imtcp & subsystems under some circumstances
+ This leak is tied to error conditions which lead to incorrect cleanup
+ of some data structures. [backport from v6]
+- bugfix/improvement:$WorkDirectory now gracefully handles trailing slashes
---------------------------------------------------------------------------
Version 5.9.0 [V5-DEVEL] (rgerhards), 2011-06-08
- imfile: added $InputFileMaxLinesAtOnce directive
@@ -47,9 +61,17 @@ Version 5.9.0 [V5-DEVEL] (rgerhards), 2011-06-08
affected directive was: $ActionExecOnlyWhenPreviousIsSuspended on
closes: http://bugzilla.adiscon.com/show_bug.cgi?id=236
---------------------------------------------------------------------------
-Version 5.8.2 [V5-stable] (rgerhards), 2011-06-??
+Version 5.8.2 [V5-stable] (rgerhards), 2011-06-21
- bugfix: problems in failover action handling
- closes: http://bugzilla.adiscon.com/show_bug.cgi?id=270 (not yet confirmed!)
+ closes: http://bugzilla.adiscon.com/show_bug.cgi?id=270
+ closes: http://bugzilla.adiscon.com/show_bug.cgi?id=254
+- bugfix: mutex was invalidly left unlocked during action processing
+ At least one case where this can occur is during thread shutdown, which
+ may be initiated by lower activity. In most cases, this is quite
+ unlikely to happen. However, if it does, data structures may be
+ corrupted which could lead to fatal failure and segfault. I detected
+ this via a testbench test, not a user report. But I assume that some
+ users may have had unreproducable aborts that were cause by this bug.
- bugfix: memory leak in imtcp & subsystems under some circumstances
This leak is tied to error conditions which lead to incorrect cleanup
of some data structures. [backport from v6]
diff --git a/action.c b/action.c
index b60e5f70..5940fa97 100644
--- a/action.c
+++ b/action.c
@@ -984,11 +984,13 @@ dbgprintf("XXXXX: tryDoAction %p, pnElem %d, nElem %d\n", pAction, *pnElem,
if(localRet == RS_RET_OK) {
/* mark messages as committed */
while(iCommittedUpTo <= i) {
+ pBatch->pElem[iCommittedUpTo].bPrevWasSuspended = 0; /* we had success! */
pBatch->pElem[iCommittedUpTo++].state = BATCH_STATE_COMM;
}
} else if(localRet == RS_RET_PREVIOUS_COMMITTED) {
/* mark messages as committed */
while(iCommittedUpTo < i) {
+ pBatch->pElem[iCommittedUpTo].bPrevWasSuspended = 0; /* we had success! */
pBatch->pElem[iCommittedUpTo++].state = BATCH_STATE_COMM;
}
pBatch->pElem[i].state = BATCH_STATE_SUB;
@@ -1568,6 +1570,9 @@ doQueueEnqObjDirectBatch(action_t *pAction, batch_t *pBatch)
}
if(pBatch->pElem[i].bFilterOK)
bNeedSubmit = 1;
+ DBGPRINTF("action %p[%d]: filterOK:%d state:%d execWhenPrev:%d prevWasSusp:%d\n",
+ pAction, i, pBatch->pElem[i].bFilterOK, pBatch->pElem[i].state,
+ pAction->bExecWhenPrevSusp, pBatch->pElem[i].bPrevWasSuspended);
}
if(bNeedSubmit) {
iRet = qqueueEnqObjDirectBatch(pAction->pQueue, pBatch);
@@ -1576,6 +1581,9 @@ doQueueEnqObjDirectBatch(action_t *pAction, batch_t *pBatch)
}
if(bModifiedFilter) {
for(i = 0 ; i < batchNumMsgs(pBatch) ; ++i) {
+ DBGPRINTF("action %p: filterOK:%d state:%d execWhenPrev:%d prevWasSusp:%d\n",
+ pAction, pBatch->pElem[i].bFilterOK, pBatch->pElem[i].state,
+ pAction->bExecWhenPrevSusp, pBatch->pElem[i].bPrevWasSuspended);
/* note: clang static code analyzer reports a false positive below */
pBatch->pElem[i].bFilterOK = pFilterSave[i];
}
@@ -1599,21 +1607,17 @@ doSubmitToActionQBatch(action_t *pAction, batch_t *pBatch)
int i;
DEFiRet;
- /* TODO
- ich arbeite an dieser funktion, es müssen die verscheidenen modi geprüft werden. ausserdem
- muss geschaut werden, in welche anderen funktionen die neue Funktionalität noch eingebaut
- werden muss, bzw. ob man das an zentralerer stelle machen kann. Am besten die gesamte
- filter evaluation nochmal druchgehen (also das füllen des arrays).
- */
-
DBGPRINTF("Called action(Batch), logging to %s\n", module.GetStateName(pAction->pMod));
if(pAction->pQueue->qType == QUEUETYPE_DIRECT) {
iRet = doQueueEnqObjDirectBatch(pAction, pBatch);
- } else { /* in this case, we do single submits to the queue.
+ } else {/* in this case, we do single submits to the queue.
* TODO: optimize this, we may do at least a multi-submit!
*/
for(i = 0 ; i < batchNumMsgs(pBatch) && !*(pBatch->pbShutdownImmediate) ; ++i) {
+ DBGPRINTF("action %p: filterOK:%d state:%d execWhenPrev:%d prevWasSusp:%d\n",
+ pAction, pBatch->pElem[i].bFilterOK, pBatch->pElem[i].state,
+ pAction->bExecWhenPrevSusp, pBatch->pElem[i].bPrevWasSuspended);
if( pBatch->pElem[i].bFilterOK
&& pBatch->pElem[i].state != BATCH_STATE_DISC
&& (pAction->bExecWhenPrevSusp == 0 || pBatch->pElem[i].bPrevWasSuspended == 1)) {
diff --git a/runtime/queue.c b/runtime/queue.c
index d31a4551..c831836d 100644
--- a/runtime/queue.c
+++ b/runtime/queue.c
@@ -1721,6 +1721,7 @@ static rsRetVal
ConsumerReg(qqueue_t *pThis, wti_t *pWti)
{
int iCancelStateSave;
+ int bNeedReLock = 0; /**< do we need to lock the mutex again? */
DEFiRet;
ISOBJ_TYPE_assert(pThis, qqueue);
@@ -1741,6 +1742,7 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti)
/* we now have a non-idle batch of work, so we can release the queue mutex and process it */
d_pthread_mutex_unlock(pThis->mut);
+ bNeedReLock = 1;
/* at this spot, we may be cancelled */
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &iCancelStateSave);
@@ -1760,12 +1762,14 @@ ConsumerReg(qqueue_t *pThis, wti_t *pWti)
/* but now cancellation is no longer permitted */
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave);
- /* now we are done, but need to re-aquire the mutex */
- d_pthread_mutex_lock(pThis->mut);
-
finalize_it:
- dbgprintf("regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet,
+ DBGPRINTF("regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet,
getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
+
+ /* now we are done, but potentially need to re-aquire the mutex */
+ if(bNeedReLock)
+ d_pthread_mutex_lock(pThis->mut);
+
RETiRet;
}
diff --git a/runtime/rule.c b/runtime/rule.c
index 19239d61..0776e2dc 100644
--- a/runtime/rule.c
+++ b/runtime/rule.c
@@ -279,7 +279,6 @@ processBatch(rule_t *pThis, batch_t *pBatch)
if(localRet != RS_RET_OK) {
DBGPRINTF("processBatch: iRet %d returned from shouldProcessThisMessage, "
"ignoring message\n", localRet);
-
pBatch->pElem[i].bFilterOK = 0;
}
if(pBatch->pElem[i].bFilterOK) {
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 3ee69413..77381d2f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -18,6 +18,7 @@ TESTS += \
imtcp_conndrop.sh \
imtcp_addtlframedelim.sh \
sndrcv.sh \
+ sndrcv_failover.sh \
sndrcv_gzip.sh \
sndrcv_udp.sh \
sndrcv_udp_nonstdpt.sh \
@@ -52,6 +53,8 @@ TESTS += \
discard-rptdmsg.sh \
discard-allmark.sh \
discard.sh \
+ failover-async.sh \
+ failover-double.sh \
failover-basic.sh \
failover-rptd.sh \
failover-no-rptd.sh \
@@ -98,7 +101,8 @@ endif
if ENABLE_GNUTLS
if HAVE_VALGRIND
-TESTS += manytcp-too-few-tls.sh
+# This test does not work on v5 as we keep DH params
+#TESTS += manytcp-too-few-tls.sh
endif
endif
@@ -285,6 +289,10 @@ EXTRA_DIST= 1.rstest 2.rstest 3.rstest err1.rstest \
failover-basic.sh \
failover-basic-vg.sh \
testsuites/failover-basic.conf \
+ failover-async.sh \
+ testsuites/failover-async.conf \
+ failover-double.sh \
+ testsuites/failover-double.conf \
discard-rptdmsg.sh \
discard-rptdmsg-vg.sh \
testsuites/discard-rptdmsg.conf \
@@ -307,6 +315,10 @@ EXTRA_DIST= 1.rstest 2.rstest 3.rstest err1.rstest \
threadingmqaq.sh \
testsuites/threadingmqaq.conf \
sndrcv_drvr.sh \
+ sndrcv_drvr_noexit.sh \
+ sndrcv_failover.sh \
+ testsuites/sndrcv_failover_sender.conf \
+ testsuites/sndrcv_failover_rcvr.conf \
sndrcv.sh \
testsuites/sndrcv_sender.conf \
testsuites/sndrcv_rcvr.conf \
diff --git a/tests/diag.sh b/tests/diag.sh
index d1242fb1..617118ba 100755
--- a/tests/diag.sh
+++ b/tests/diag.sh
@@ -10,7 +10,7 @@
#valgrind="valgrind --tool=helgrind --log-fd=1"
#valgrind="valgrind --tool=exp-ptrcheck --log-fd=1"
#set -o xtrace
-#export RSYSLOG_DEBUG="debug nologfuncflow nostdout"
+#export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction stdout"
#export RSYSLOG_DEBUGLOG="log"
case $1 in
'init') $srcdir/killrsyslog.sh # kill rsyslogd if it runs for some reason
@@ -22,7 +22,7 @@ case $1 in
rm -f work rsyslog.out.log rsyslog2.out.log rsyslog.out.log.save # common work files
rm -rf test-spool test-logdir
rm -f rsyslog.out.*.log work-presort rsyslog.pipe
- rm -f rsyslog.input
+ rm -f rsyslog.input rsyslog.empty
rm -f core.* vgcore.*
mkdir test-spool
;;
@@ -31,7 +31,7 @@ case $1 in
rm -f work rsyslog.out.log rsyslog2.out.log rsyslog.out.log.save # common work files
rm -rf test-spool test-logdir
rm -f rsyslog.out.*.log rsyslog.random.data work-presort rsyslog.pipe
- rm -f rsyslog.input stat-file1
+ rm -f rsyslog.input stat-file1 #rsyslog.empty
echo -------------------------------------------------------------------------------
;;
'startup') # start rsyslogd with default params. $2 is the config file name to use
diff --git a/tests/failover-async.sh b/tests/failover-async.sh
index f17bc0ff..5fc393de 100755
--- a/tests/failover-async.sh
+++ b/tests/failover-async.sh
@@ -3,7 +3,7 @@ echo ===========================================================================
echo \[failover-async.sh\]: async test for failover functionality
source $srcdir/diag.sh init
source $srcdir/diag.sh startup failover-async.conf
-source $srcdir/diag.sh injectmsg 0 5
+source $srcdir/diag.sh injectmsg 0 5000
echo doing shutdown
source $srcdir/diag.sh shutdown-when-empty
echo wait on shutdown
diff --git a/tests/failover-double.sh b/tests/failover-double.sh
new file mode 100755
index 00000000..172b91de
--- /dev/null
+++ b/tests/failover-double.sh
@@ -0,0 +1,12 @@
+# This file is part of the rsyslog project, released under GPLv3
+echo ===============================================================================
+echo \[failover-double.sh\]: test for double failover functionality
+source $srcdir/diag.sh init
+source $srcdir/diag.sh startup failover-double.conf
+source $srcdir/diag.sh injectmsg 0 5000
+echo doing shutdown
+source $srcdir/diag.sh shutdown-when-empty
+echo wait on shutdown
+source $srcdir/diag.sh wait-shutdown
+source $srcdir/diag.sh seq-check 0 4999
+source $srcdir/diag.sh exit
diff --git a/tests/sndrcv_drvr.sh b/tests/sndrcv_drvr.sh
index 3d613069..f9092647 100755
--- a/tests/sndrcv_drvr.sh
+++ b/tests/sndrcv_drvr.sh
@@ -1,50 +1,2 @@
-# This is test driver for testing two rsyslog instances. It can be
-# utilized by any test that just needs two instances with different
-# config files, where messages are injected in instance TWO and
-# (with whatever rsyslog mechanism) being relayed over to instance ONE,
-# where they are written to the log file. After the run, the completeness
-# of that log file is checked.
-# The code is almost the same, but the config files differ (probably greatly)
-# for different test cases. As such, this driver needs to be called with the
-# config file name ($2). From that name, the sender and receiver config file
-# names are automatically generated.
-# So: $1 config file name, $2 number of messages
-#
-# A note on TLS testing: the current testsuite (in git!) already contains
-# TLS test cases. However, getting these test cases correct is not simple.
-# That's not a problem with the code itself, but rater a problem with
-# synchronization in the test environment. So I have deciced to keep the
-# TLS tests in, but not yet actually utilize them. This is most probably
-# left as an excercise for future (devel) releases. -- rgerhards, 2009-11-11
-#
-# added 2009-11-11 by Rgerhards
-# This file is part of the rsyslog project, released under GPLv3
-# uncomment for debugging support:
-source $srcdir/diag.sh init
-# start up the instances
-#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction"
-#export RSYSLOG_DEBUGLOG="log"
-source $srcdir/diag.sh startup $1_rcvr.conf
-source $srcdir/diag.sh wait-startup
-#export RSYSLOG_DEBUGLOG="log2"
-#valgrind="valgrind"
-source $srcdir/diag.sh startup $1_sender.conf 2
-source $srcdir/diag.sh wait-startup 2
-# may be needed by TLS (once we do it): sleep 30
-
-# now inject the messages into instance 2. It will connect to instance 1,
-# and that instance will record the data.
-source $srcdir/diag.sh tcpflood -m$2 -i1
-sleep 2 # make sure all data is received in input buffers
-# shut down sender when everything is sent, receiver continues to run concurrently
-# may be needed by TLS (once we do it): sleep 60
-source $srcdir/diag.sh shutdown-when-empty 2
-source $srcdir/diag.sh wait-shutdown 2
-# now it is time to stop the receiver as well
-source $srcdir/diag.sh shutdown-when-empty
-source $srcdir/diag.sh wait-shutdown
-
-# may be needed by TLS (once we do it): sleep 60
-# do the final check
-source $srcdir/diag.sh seq-check 1 $2
+source $srcdir/sndrcv_drvr_noexit.sh $1 $2
source $srcdir/diag.sh exit
diff --git a/tests/sndrcv_drvr_noexit.sh b/tests/sndrcv_drvr_noexit.sh
new file mode 100755
index 00000000..899eace3
--- /dev/null
+++ b/tests/sndrcv_drvr_noexit.sh
@@ -0,0 +1,49 @@
+# This is test driver for testing two rsyslog instances. It can be
+# utilized by any test that just needs two instances with different
+# config files, where messages are injected in instance TWO and
+# (with whatever rsyslog mechanism) being relayed over to instance ONE,
+# where they are written to the log file. After the run, the completeness
+# of that log file is checked.
+# The code is almost the same, but the config files differ (probably greatly)
+# for different test cases. As such, this driver needs to be called with the
+# config file name ($2). From that name, the sender and receiver config file
+# names are automatically generated.
+# So: $1 config file name, $2 number of messages
+#
+# A note on TLS testing: the current testsuite (in git!) already contains
+# TLS test cases. However, getting these test cases correct is not simple.
+# That's not a problem with the code itself, but rater a problem with
+# synchronization in the test environment. So I have deciced to keep the
+# TLS tests in, but not yet actually utilize them. This is most probably
+# left as an excercise for future (devel) releases. -- rgerhards, 2009-11-11
+#
+# added 2009-11-11 by Rgerhards
+# This file is part of the rsyslog project, released under GPLv3
+# uncomment for debugging support:
+source $srcdir/diag.sh init
+# start up the instances
+#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction"
+#export RSYSLOG_DEBUGLOG="log"
+source $srcdir/diag.sh startup $1_rcvr.conf
+source $srcdir/diag.sh wait-startup
+#export RSYSLOG_DEBUGLOG="log2"
+#valgrind="valgrind"
+source $srcdir/diag.sh startup $1_sender.conf 2
+source $srcdir/diag.sh wait-startup 2
+# may be needed by TLS (once we do it): sleep 30
+
+# now inject the messages into instance 2. It will connect to instance 1,
+# and that instance will record the data.
+source $srcdir/diag.sh tcpflood -m$2 -i1
+sleep 2 # make sure all data is received in input buffers
+# shut down sender when everything is sent, receiver continues to run concurrently
+# may be needed by TLS (once we do it): sleep 60
+source $srcdir/diag.sh shutdown-when-empty 2
+source $srcdir/diag.sh wait-shutdown 2
+# now it is time to stop the receiver as well
+source $srcdir/diag.sh shutdown-when-empty
+source $srcdir/diag.sh wait-shutdown
+
+# may be needed by TLS (once we do it): sleep 60
+# do the final check
+source $srcdir/diag.sh seq-check 1 $2
diff --git a/tests/sndrcv_failover.sh b/tests/sndrcv_failover.sh
new file mode 100755
index 00000000..4c5e1831
--- /dev/null
+++ b/tests/sndrcv_failover.sh
@@ -0,0 +1,21 @@
+# This tests failover capabilities. Data is sent to local port 13516, where
+# no process shall listen. Then it fails over to a second instance, then to
+# a file. The second instance is started. So all data should be received
+# there and none be logged to the file.
+# This builds on the basic sndrcv.sh test, but adds a first, failing,
+# location to the conf file.
+# added 2011-06-20 by Rgerhards
+# This file is part of the rsyslog project, released under GPLv3
+echo ===============================================================================
+echo \[sndrcv_failover.sh\]: testing failover capabilities for tcp sending
+source $srcdir/sndrcv_drvr_noexit.sh sndrcv_failover 50000
+ls -l rsyslog.empty
+if [[ -s rsyslog.empty ]] ; then
+ echo "FAIL: rsyslog.empty has data. Failover handling failed. Data is written"
+ echo " even though the previous action (in a failover chain!) properly"
+ echo " worked."
+ exit 1
+else
+ echo "rsyslog.empty is empty - OK"
+fi ;
+source $srcdir/diag.sh exit
diff --git a/tests/testsuites/failover-double.conf b/tests/testsuites/failover-double.conf
new file mode 100644
index 00000000..a9991321
--- /dev/null
+++ b/tests/testsuites/failover-double.conf
@@ -0,0 +1,9 @@
+$IncludeConfig diag-common.conf
+
+$template outfmt,"%msg:F,58:2%\n"
+
+:msg, contains, "msgnum:" @@127.0.0.1:13516
+$ActionExecOnlyWhenPreviousIsSuspended on
+& @@127.0.0.1:1234
+& ./rsyslog.out.log;outfmt
+$ActionExecOnlyWhenPreviousIsSuspended off
diff --git a/tests/testsuites/sndrcv_failover_rcvr.conf b/tests/testsuites/sndrcv_failover_rcvr.conf
new file mode 100644
index 00000000..6f7ce34b
--- /dev/null
+++ b/tests/testsuites/sndrcv_failover_rcvr.conf
@@ -0,0 +1,11 @@
+# see equally-named shell file for details
+# rgerhards, 2009-11-11
+$IncludeConfig diag-common.conf
+
+$ModLoad ../plugins/imtcp/.libs/imtcp
+# then SENDER sends to this port (not tcpflood!)
+$InputTCPServerRun 13515
+
+$template outfmt,"%msg:F,58:2%\n"
+$template dynfile,"rsyslog.out.log" # trick to use relative path names!
+:msg, contains, "msgnum:" ?dynfile;outfmt
diff --git a/tests/testsuites/sndrcv_failover_sender.conf b/tests/testsuites/sndrcv_failover_sender.conf
new file mode 100644
index 00000000..b8e7c186
--- /dev/null
+++ b/tests/testsuites/sndrcv_failover_sender.conf
@@ -0,0 +1,13 @@
+# see tcpsndrcv.sh for details
+# rgerhards, 2009-11-11
+$IncludeConfig diag-common2.conf
+
+$ModLoad ../plugins/imtcp/.libs/imtcp
+# this listener is for message generation by the test framework!
+$InputTCPServerRun 13514
+
+*.* @@127.0.0.1:13516 # this must be DEAD
+$ActionExecOnlyWhenPreviousIsSuspended on
+& @@127.0.0.1:13515
+& ./rsyslog.empty
+$ActionExecOnlyWhenPreviousIsSuspended off