From cf8b0717e12330695ce2e4b16966fda2a64b053e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Sep 2008 10:48:19 +0200 Subject: ignoring an (acceptable) race in debug system --- runtime/debug.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/runtime/debug.c b/runtime/debug.c index 1450d029..7ed1442b 100644 --- a/runtime/debug.c +++ b/runtime/debug.c @@ -480,7 +480,23 @@ static inline void dbgMutexUnlockLog(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB pthread_mutex_lock(&mutMutLog); pLog = dbgMutLogFindSpecific(pmut, MUTOP_LOCK, NULL); +#if 0 /* toggle for testing */ assert(pLog != NULL); +#else +/* the change below seems not to work - the problem seems to be a real race... I keep this code in just in case + * I need to re-use it. It should be removed once we are finished analyzing this problem. -- rgerhards, 2008-09-17 + */ +if(pLog == NULL) { + /* this may happen due to some races. We do not try to avoid + * this, as it would complicate the "real" code. This is not justified + * just to keep the debug info system up. -- rgerhards, 2008-09-17 + */ + pthread_mutex_unlock(&mutMutLog); + dbgprintf("%s:%d:%s: mutex %p UNlocked [but we did not yet know this mutex!]\n", + pFuncDB->file, unlockLn, pFuncDB->func, (void*)pmut); + return; /* if we don't know it yet, we can not clean up... */ +} +#endif /* we found the last lock entry. We now need to see from which FuncDB we need to * remove it. This is recorded inside the mutex log entry. -- cgit From 988989e49ef8639123c83383ba256c4e67679c8d Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Sep 2008 10:49:16 +0200 Subject: re-enabled gcc builtin atomic operations and added a proper ./configure check --- ChangeLog | 2 ++ configure.ac | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ runtime/atomic.h | 18 +++++++++--------- 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1995ca10..125b0ada 100644 --- a/ChangeLog +++ b/ChangeLog @@ -11,6 +11,8 @@ Version 3.21.5 [DEVEL] (rgerhards), 2008-09-?? each input module (else it is blank). - added system property "$myhostname", which contains the name of the local host as it knows itself. +- re-enabled gcc builtin atomic operations and added a proper + ./configure check --------------------------------------------------------------------------- Version 3.21.4 [DEVEL] (rgerhards), 2008-09-04 - removed compile time fixed message size limit (was 2K), limit can now diff --git a/configure.ac b/configure.ac index 8e9f7807..fea7c063 100644 --- a/configure.ac +++ b/configure.ac @@ -107,6 +107,56 @@ AC_TRY_COMPILE([ AC_MSG_RESULT(no; defined as 64) ) +# check for availability of atomic operations +# rgerhards, 2008-09-18, added based on +# http://svn.apache.org/repos/asf/apr/apr/trunk/configure.in + +AC_CACHE_CHECK([whether the compiler provides atomic builtins], [ap_cv_atomic_builtins], +[AC_TRY_RUN([ +int main() +{ + unsigned long val = 1010, tmp, *mem = &val; + + if (__sync_fetch_and_add(&val, 1010) != 1010 || val != 2020) + return 1; + + tmp = val; + + if (__sync_fetch_and_sub(mem, 1010) != tmp || val != 1010) + return 1; + + if (__sync_sub_and_fetch(&val, 1010) != 0 || val != 0) + return 1; + + tmp = 3030; + + if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || val != tmp) + return 1; + + if (__sync_lock_test_and_set(&val, 4040) != 3030) + return 1; + + mem = &tmp; + + if (__sync_val_compare_and_swap(&mem, &tmp, &val) != &tmp) + return 1; + + __sync_synchronize(); + + if (mem != &val) + return 1; + + return 0; +}], [ap_cv_atomic_builtins=yes], [ap_cv_atomic_builtins=no], [ap_cv_atomic_builtins=no])]) + +if test "$ap_cv_atomic_builtins" = "yes"; then + AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, [Define if compiler provides atomic builtins]) +fi + + + + + # Large file support AC_ARG_ENABLE(largefile, [AS_HELP_STRING([--enable-largefile],[Enable large file support @<:@default=yes@:>@])], diff --git a/runtime/atomic.h b/runtime/atomic.h index 430ae7f0..d6811628 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -1,6 +1,6 @@ /* This header supplies atomic operations. So far, we rely on GCC's - * atomic builtins. I have no idea if we can check them via autotools, - * but I am making the necessary provisioning to live without them if + * atomic builtins. During configure, we check if atomic operatons are + * available. If they are not, I am making the necessary provisioning to live without them if * they are not available. Please note that you should only use the macros * here if you think you can actually live WITHOUT an explicit atomic operation, * because in the non-presence of them, we simply do it without atomicitiy. @@ -36,16 +36,16 @@ #ifndef INCLUDED_ATOMIC_H #define INCLUDED_ATOMIC_H -/* set the following to 1 if we have atomic operations (and #undef it otherwise) */ -/* #define DO_HAVE_ATOMICS 1 */ /* for this release, we disable atomic calls because there seem to be some * portability problems and we can not fix that without destabilizing the build. * They simply came in too late. -- rgerhards, 2008-04-02 */ -/* make sure they are not used! -#define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&data, 1)) -#define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&data, 1) -*/ -#define ATOMIC_INC(data) (++(data)) +#ifdef HAVE_ATOMIC_BUILTINS +# define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&data, 1)) +# define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&data, 1) +#else +# warning "atomic builtins not available, using nul operations" +# define ATOMIC_INC(data) (++(data)) +#endif #endif /* #ifndef INCLUDED_ATOMIC_H */ -- cgit From 4c96ebdcfe075e80810b01257cf21ea1c9b3ec0e Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 18 Sep 2008 12:19:33 +0200 Subject: bugfix: potential race condition when adding messages to queue There was a wrong order of mutex lock operations. It is hard to believe that really caused problems, but in theory it could and with threading we often see that theory becomes practice if something is only used long enough on a fast enough machine with enough CPUs ;) --- ChangeLog | 5 +++++ runtime/atomic.h | 8 ++++++-- runtime/debug.h | 3 ++- runtime/queue.c | 12 ++++++------ tools/syslogd.c | 7 +++---- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 125b0ada..23ac95fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,11 @@ Version 3.21.5 [DEVEL] (rgerhards), 2008-09-?? local host as it knows itself. - re-enabled gcc builtin atomic operations and added a proper ./configure check +- bugfix: potential race condition when adding messages to queue + There was a wrong order of mutex lock operations. It is hard to + believe that really caused problems, but in theory it could and with + threading we often see that theory becomes practice if something is only + used long enough on a fast enough machine with enough CPUs ;) --------------------------------------------------------------------------- Version 3.21.4 [DEVEL] (rgerhards), 2008-09-04 - removed compile time fixed message size limit (was 2K), limit can now diff --git a/runtime/atomic.h b/runtime/atomic.h index d6811628..d15f78ee 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -41,11 +41,15 @@ * They simply came in too late. -- rgerhards, 2008-04-02 */ #ifdef HAVE_ATOMIC_BUILTINS -# define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&data, 1)) -# define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&data, 1) +# define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&(data), 1)) +# define ATOMIC_DEC_AND_FETCH(data) __sync_sub_and_fetch(&(data), 1) +# define ATOMIC_FETCH_32BIT(data) ((unsigned) __sync_fetch_and_and(&(data), 0xffffffff)) +# define ATOMIC_STORE_1_TO_32BIT(data) __sync_lock_test_and_set(&(data), 1) #else # warning "atomic builtins not available, using nul operations" # define ATOMIC_INC(data) (++(data)) +# define ATOMIC_FETCH_32BIT(data) (data) +# define ATOMIC_STORE_1_TO_32BIT(data) (data) = 1 #endif #endif /* #ifndef INCLUDED_ATOMIC_H */ diff --git a/runtime/debug.h b/runtime/debug.h index 214b7c05..d9d576b5 100644 --- a/runtime/debug.h +++ b/runtime/debug.h @@ -130,7 +130,8 @@ void dbgPrintAllDebugInfo(void); /* debug aides */ -#ifdef RTINST +//#ifdef RTINST +#if 0 // temporarily removed for helgrind #define d_pthread_mutex_lock(x) dbgMutexLock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_mutex_unlock(x) dbgMutexUnlock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_cond_wait(cond, mut) dbgCondWait(cond, mut, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) diff --git a/runtime/queue.c b/runtime/queue.c index 7e7d4152..c0a37019 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -2171,17 +2171,17 @@ queueEnqObj(queue_t *pThis, flowControl_t flowCtlType, void *pUsr) finalize_it: if(pThis->qType != QUEUETYPE_DIRECT) { - d_pthread_mutex_unlock(pThis->mut); + /* make sure at least one worker is running. */ + if(pThis->qType != QUEUETYPE_DIRECT) { + queueAdviseMaxWorkers(pThis); + } + /* and release the mutex */ i = pthread_cond_signal(&pThis->notEmpty); + d_pthread_mutex_unlock(pThis->mut); dbgoprint((obj_t*) pThis, "EnqueueMsg signaled condition (%d)\n", i); pthread_setcancelstate(iCancelStateSave, NULL); } - /* make sure at least one worker is running. */ - if(pThis->qType != QUEUETYPE_DIRECT) { - queueAdviseMaxWorkers(pThis); - } - RETiRet; } diff --git a/tools/syslogd.c b/tools/syslogd.c index b6e1d826..3a637dd8 100644 --- a/tools/syslogd.c +++ b/tools/syslogd.c @@ -2832,9 +2832,8 @@ static rsRetVal mainThread() CHKiRet(init()); - if(Debug) { + if(Debug && debugging_on) { dbgprintf("Debugging enabled, SIGUSR1 to turn off debugging.\n"); - debugging_on = 1; } /* Send a signal to the parent so it can terminate. */ @@ -3082,9 +3081,9 @@ doGlblProcessInit(void) fputs(" Already running.\n", stderr); exit(1); /* "good" exit, done if syslogd is already running */ } - } - else + } else { debugging_on = 1; + } /* tuck my process id away */ dbgprintf("Writing pidfile %s.\n", PidFile); -- cgit