summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--action.c28
-rw-r--r--action.h3
-rwxr-xr-xtests/pipe_noreader.sh9
4 files changed, 47 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 98ca6bbd..a24a18ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,11 +1,12 @@
---------------------------------------------------------------------------
Version 5.4.2 [v5-stable] (rgerhards), 2010-03-??
-- bugfix(minor): status variable was uninitialized
- However, this would have caused harm only if NO parser modules at
- all were loaded, which would lead to a defunctional configuration
- at all. And, even more important, this is impossible as two parser
- modules are built-in and thus can not be "not loaded", so we always
- have a minimum of two.
+- bugfix(kind of): output plugin retry behaviour could cause engine to loop
+ The rsyslog engine did not guard itself against output modules that do
+ not properly convey back the tryResume() behaviour. This then leads to
+ what looks like an endless loop. I consider this to be a bug of the
+ engine not only because it should be hardened against plugin misbehaviour,
+ but also because plugins may not be totally able to avoid this situation
+ (depending on the type of and processing done by the plugin).
- bugfix: testbench failed when not executed in UTC+1 timezone
accidently, the time zone information was kept inside some
to-be-checked-for responses
@@ -13,6 +14,12 @@ Version 5.4.2 [v5-stable] (rgerhards), 2010-03-??
message-induced off-by-one error (potential segfault) (see 4.6.2)
The analysis has been completed and a better fix been crafted and
integrated.
+- bugfix(minor): status variable was uninitialized
+ However, this would have caused harm only if NO parser modules at
+ all were loaded, which would lead to a defunctional configuration
+ at all. And, even more important, this is impossible as two parser
+ modules are built-in and thus can not be "not loaded", so we always
+ have a minimum of two.
---------------------------------------------------------------------------
Version 5.4.1 [v5-stable] (rgerhards), 2010-03-??
- added new property replacer option "date-rfc3164-buggyday" primarily
diff --git a/action.c b/action.c
index 65465785..cff2680b 100644
--- a/action.c
+++ b/action.c
@@ -445,6 +445,7 @@ static void actionCommitted(action_t *pThis)
static void actionRetry(action_t *pThis)
{
actionSetState(pThis, ACT_STATE_RTRY);
+ pThis->iResumeOKinRow++;
}
@@ -480,23 +481,39 @@ static inline void actionSuspend(action_t *pThis, time_t ttNow)
/* actually do retry processing. Note that the function receives a timestamp so
* that we do not need to call the (expensive) time() API.
* Note that we do the full retry processing here, doing the configured number of
- * iterations.
- * rgerhards, 2009-05-07
+ * iterations. -- rgerhards, 2009-05-07
+ * We need to guard against module which always return RS_RET_OK from their tryResume()
+ * entry point. This is invalid, but has harsh consequences: it will cause the rsyslog
+ * engine to go into a tight loop. That obviously is not acceptable. As such, we track the
+ * count of iterations that a tryResume returning RS_RET_OK is immediately followed by
+ * an unsuccessful call to doAction(). If that happens more than 1,000 times, we assume
+ * the return acutally is a RS_RET_SUSPENDED. In order to go through the various
+ * resumption stages, we do this for every 1000 requests. This magic number 1000 may
+ * not be the most appropriate, but it should be thought of a "if nothing else helps"
+ * kind of facility: in the first place, the module should return a proper indication
+ * of its inability to recover. -- rgerhards, 2010-04-26.
*/
static rsRetVal actionDoRetry(action_t *pThis, time_t ttNow)
{
int iRetries;
int iSleepPeriod;
+ int bTreatOKasSusp;
DEFiRet;
ASSERT(pThis != NULL);
iRetries = 0;
while(pThis->eState == ACT_STATE_RTRY) {
+dbgprintf("YYY: resume in row %d\n", pThis->iResumeOKinRow);
iRet = pThis->pMod->tryResume(pThis->pModData);
- if(iRet == RS_RET_OK) {
+ if((pThis->iResumeOKinRow > 999) && (pThis->iResumeOKinRow % 1000 == 0)) {
+ bTreatOKasSusp = 1;
+ } else {
+ bTreatOKasSusp = 0;
+ }
+ if((iRet == RS_RET_OK) && (!bTreatOKasSusp)) {
actionSetState(pThis, ACT_STATE_RDY);
- } else if(iRet == RS_RET_SUSPENDED) {
+ } else if(iRet == RS_RET_SUSPENDED || bTreatOKasSusp) {
/* max retries reached? */
if((pThis->iResumeRetryCount != -1 && iRetries >= pThis->iResumeRetryCount)) {
actionSuspend(pThis, ttNow);
@@ -715,13 +732,16 @@ actionCallDoAction(action_t *pThis, msg_t *pMsg)
switch(iRet) {
case RS_RET_OK:
actionCommitted(pThis);
+ pThis->iResumeOKinRow = 0; /* we had a successful call! */
break;
case RS_RET_DEFER_COMMIT:
+ pThis->iResumeOKinRow = 0; /* we had a successful call! */
/* we are done, action state remains the same */
break;
case RS_RET_PREVIOUS_COMMITTED:
/* action state remains the same, but we had a commit. */
pThis->bHadAutoCommit = 1;
+ pThis->iResumeOKinRow = 0; /* we had a successful call! */
break;
case RS_RET_SUSPENDED:
actionRetry(pThis);
diff --git a/action.h b/action.h
index 6cc4df5c..4a6c3c88 100644
--- a/action.h
+++ b/action.h
@@ -56,8 +56,9 @@ struct action_s {
bool bWriteAllMarkMsgs;/* should all mark msgs be written (not matter how recent the action was executed)? */
int iSecsExecOnceInterval; /* if non-zero, minimum seconds to wait until action is executed again */
action_state_t eState; /* current state of action */
- int bHadAutoCommit; /* did an auto-commit happen during doAction()? */
+ bool bHadAutoCommit; /* did an auto-commit happen during doAction()? */
time_t ttResumeRtry; /* when is it time to retry the resume? */
+ int iResumeOKinRow; /* number of times in a row that resume said OK with an immediate failure following */
int iResumeInterval;/* resume interval for this action */
int iResumeRetryCount;/* how often shall we retry a suspended action? (-1 --> eternal) */
int iNbrResRtry; /* number of retries since last suspend */
diff --git a/tests/pipe_noreader.sh b/tests/pipe_noreader.sh
index c80aaf3f..8dea0818 100755
--- a/tests/pipe_noreader.sh
+++ b/tests/pipe_noreader.sh
@@ -4,6 +4,12 @@
# as a permanent testcase. For some details, please see bug tracker
# http://bugzilla.adiscon.com/show_bug.cgi?id=186
#
+# IMPORTANT: we do NOT check any result message set. The whole point in
+# this test is to verify that we do NOT run into an eternal loop. As such,
+# the test is "PASS", if rsyslogd terminates. If it does not terminate, we
+# obviously do not cause "FAIL", but processing will hang, which should be
+# a good-enough indication of failure.
+#
# added 2010-04-26 by Rgerhards
# This file is part of the rsyslog project, released under GPLv3
echo ===============================================================================
@@ -18,5 +24,6 @@ source $srcdir/diag.sh startup pipe_noreader.conf
source $srcdir/diag.sh tcpflood -m1000 -d500
source $srcdir/diag.sh shutdown-when-empty # shut down rsyslogd when done processing messages
source $srcdir/diag.sh wait-shutdown # and wait for it to terminate
-source $srcdir/diag.sh seq-check 0 999
+# NO need to check seqno -- see header comment
+echo we did not loop, so the test is sucessfull
source $srcdir/diag.sh exit