From 1fb45d3e993e44e1595fc54f1ad3b786c66fbb4c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Mar 2010 12:34:26 +0100 Subject: some cleanup, some additional comments and a bit more debug output --- runtime/stream.c | 13 +++++++++++-- tests/asynwr_deadlock.sh | 4 ++-- tests/asynwr_deadlock2.sh | 16 ++++++++++------ tests/testsuites/asynwr_deadlock.conf | 8 -------- tests/testsuites/gzipwr_large_dynfile.conf | 2 +- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/runtime/stream.c b/runtime/stream.c index 87daedaf..9a0a8615 100644 --- a/runtime/stream.c +++ b/runtime/stream.c @@ -316,7 +316,8 @@ static rsRetVal strmCloseFile(strm_t *pThis) DEFiRet; ASSERT(pThis != NULL); - DBGOPRINT((obj_t*) pThis, "file %d closing\n", pThis->fd); + DBGOPRINT((obj_t*) pThis, "file %d(%s) closing\n", pThis->fd, + (pThis->pszFName == NULL) ? "N/A" : (char*)pThis->pszFName); if(pThis->tOperationsMode != STREAMMODE_READ) { strmFlush(pThis); @@ -929,6 +930,7 @@ asyncWriterThread(void *pPtr) while(1) { /* loop broken inside */ d_pthread_mutex_lock(&pThis->mut); +dbgprintf("XXX: asyncWriterThread iterating %s\n", pThis->pszFName); while(pThis->iCnt == 0) { if(pThis->bStopWriter) { pthread_cond_broadcast(&pThis->isEmpty); @@ -944,6 +946,7 @@ asyncWriterThread(void *pPtr) bTimedOut = 0; timeoutComp(&t, pThis->iFlushInterval * 2000); /* *1000 millisconds */ if(pThis->bDoTimedWait) { +dbgprintf("asyncWriter thread going to timeout sleep\n"); if(pthread_cond_timedwait(&pThis->notEmpty, &pThis->mut, &t) != 0) { int err = errno; if(err == ETIMEDOUT) { @@ -957,13 +960,16 @@ asyncWriterThread(void *pPtr) } } } else { +dbgprintf("asyncWriter thread going to eternal sleep\n"); d_pthread_cond_wait(&pThis->notEmpty, &pThis->mut); } +dbgprintf("asyncWriter woke up\n"); } bTimedOut = 0; /* we may have timed out, but there *is* work to do... */ iDeq = pThis->iDeq++ % STREAM_ASYNC_NUMBUFS; +dbgprintf("asyncWriter writes data\n"); doWriteInternal(pThis, pThis->asyncBuf[iDeq].pBuf, pThis->asyncBuf[iDeq].lenBuf); // TODO: error check????? 2009-07-06 @@ -1135,7 +1141,10 @@ strmFlush(strm_t *pThis) DEFiRet; ASSERT(pThis != NULL); - DBGOPRINT((obj_t*) pThis, "file %d flush, buflen %ld\n", pThis->fd, (long) pThis->iBufPtr); + DBGOPRINT((obj_t*) pThis, "file %d(%s) flush, buflen %ld\n", pThis->fd, pThis->pszFName, (long) pThis->iBufPtr); + DBGOPRINT((obj_t*) pThis, "file %d(%s) flush, buflen %ld%s\n", pThis->fd, + (pThis->pszFName == NULL) ? "N/A" : (char*)pThis->pszFName, + (long) pThis->iBufPtr, (pThis->iBufPtr == 0) ? " (no need to flush)" : ""); if(pThis->tOperationsMode != STREAMMODE_READ && pThis->iBufPtr > 0) { iRet = strmSchedWrite(pThis, pThis->pIOBuf, pThis->iBufPtr); diff --git a/tests/asynwr_deadlock.sh b/tests/asynwr_deadlock.sh index 2d08419c..dc08355e 100755 --- a/tests/asynwr_deadlock.sh +++ b/tests/asynwr_deadlock.sh @@ -8,8 +8,8 @@ echo =========================================================================== echo TEST: \[asynwr_deadlock.sh\]: a case known to have caused a deadlock in the past source $srcdir/diag.sh init # uncomment for debugging support: -export RSYSLOG_DEBUG="debug nostdout noprintmutexaction" -export RSYSLOG_DEBUGLOG="log" +#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction" +#export RSYSLOG_DEBUGLOG="log" source $srcdir/diag.sh startup asynwr_deadlock.conf # just send one message source $srcdir/diag.sh tcpflood -m1 diff --git a/tests/asynwr_deadlock2.sh b/tests/asynwr_deadlock2.sh index 3b429b9a..34d37d48 100755 --- a/tests/asynwr_deadlock2.sh +++ b/tests/asynwr_deadlock2.sh @@ -20,10 +20,14 @@ # cirumstances. As far as the analysis goes, the following need to happen: # 1. buffers on that file are being flushed # 2. no new data arrives -# 3. the inactivity timeout has not yet happend +# 3. the inactivity timeout has not yet expired # 4. *then* (and only then) the stream is closed or destructed -# In that, 1 to 3 are prequisites for the deadlock which will happen in 4. In order -# to create this case reliably, I have used the "$OMFileFlushOnTXEnd on" directive +# In that, 1 to 4 are prequisites for the deadlock which will happen in 4. However, +# for it to happen, we also need the right "timing". There is a race between the +# main thread and the async writer thread. The deadlock will only happen under +# the "right" circumstances, which basically means it will not happen always. +# In order to create this case as reliable as possible, I have used +# the "$OMFileFlushOnTXEnd on" directive # inside my test case. It makes sure that #1 above happens. The test uses a dynafile # cache size of 4, and the load generator generates data for 5 different dynafiles. # So over time, we will hit a spot where 4 dynafiles are open and the 5th file name @@ -50,8 +54,8 @@ echo =========================================================================== echo TEST: \[asynwr_deadlock2.sh\]: a case known to have caused a deadlock in the past source $srcdir/diag.sh init # uncomment for debugging support: -export RSYSLOG_DEBUG="debug nostdout noprintmutexaction" -export RSYSLOG_DEBUGLOG="log" +#export RSYSLOG_DEBUG="debug nostdout noprintmutexaction" +#export RSYSLOG_DEBUGLOG="log" source $srcdir/diag.sh startup asynwr_deadlock2.conf # send 20000 messages, each close to 2K (non-randomized!), so that we can fill # the buffers and hopefully run into the "deadlock". @@ -59,6 +63,6 @@ source $srcdir/diag.sh tcpflood -m20000 -d1800 -P129 -i1 -f5 # sleep is important! need to make sure the instance is inactive source $srcdir/diag.sh shutdown-when-empty # shut down rsyslogd when done processing messages source $srcdir/diag.sh wait-shutdown # and wait for it to terminate -zcat rsyslog.out.*.log > rsyslog.out.log +cat rsyslog.out.*.log > rsyslog.out.log source $srcdir/diag.sh seq-check 1 20000 -E source $srcdir/diag.sh exit diff --git a/tests/testsuites/asynwr_deadlock.conf b/tests/testsuites/asynwr_deadlock.conf index b3a052df..d703791a 100644 --- a/tests/testsuites/asynwr_deadlock.conf +++ b/tests/testsuites/asynwr_deadlock.conf @@ -1,4 +1,3 @@ -# simple async writing test # rgerhards, 2010-03-09 $IncludeConfig diag-common.conf @@ -7,15 +6,8 @@ $MainMsgQueueTimeoutShutdown 10000 $InputTCPServerRun 13514 $template outfmt,"%msg:F,58:2%\n" -$template dynfile,"rsyslog.out.log" # trick to use relative path names! $OMFileFlushOnTXEnd on $OMFileFlushInterval 10 $OMFileFlushIOBufferSize 10k -#:msg, contains, "msgnum:" ?dynfile;outfmt :msg, contains, "msgnum:" ./rsyslog.out.log;outfmt - -$OMFileFlushOnTXEnd on -$OMFileFlushInterval 10 -$OMFileFlushIOBufferSize 10k -#:msg, contains, "msgnum:" ./rsyslog.out.2.log;outfmt diff --git a/tests/testsuites/gzipwr_large_dynfile.conf b/tests/testsuites/gzipwr_large_dynfile.conf index 31bcb978..3a1b255a 100644 --- a/tests/testsuites/gzipwr_large_dynfile.conf +++ b/tests/testsuites/gzipwr_large_dynfile.conf @@ -8,7 +8,7 @@ $MainMsgQueueTimeoutShutdown 10000 $InputTCPServerRun 13514 $template outfmt,"%msg:F,58:3%,%msg:F,58:4%,%msg:F,58:5%\n" -$template dynfile,"rsyslog.out.%msg:F,58:2%.log" # trick to use relative path names! +$template dynfile,"rsyslog.out.%msg:F,58:2%.log" # use multiple dynafiles $OMFileFlushOnTXEnd off $OMFileZipLevel 6 $OMFileIOBufferSize 256k -- cgit