From a03d90abe1d3ac67bc5492db09e2c6e207288469 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 11 Mar 2009 15:26:51 +0100 Subject: try to make file writer restartable (experimental, untested) --- tools/omfile.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/omfile.c b/tools/omfile.c index 1539ae19..4bef5dbf 100644 --- a/tools/omfile.c +++ b/tools/omfile.c @@ -621,20 +621,21 @@ again: */ if((pData->fileType == eTypeTTY || pData->fileType == eTypeCONSOLE) #ifdef linux - && e == EIO) { + && e == EIO #else - && e == EBADF) { + && e == EBADF #endif + ) { pData->fd = open((char*) pData->f_fname, O_WRONLY|O_APPEND|O_NOCTTY); if (pData->fd < 0) { - iRet = RS_RET_DISABLE_ACTION; + iRet = RS_RET_SUSPENDED; errmsg.LogError(0, NO_ERRCODE, "%s", pData->f_fname); } else { untty(); goto again; } } else { - iRet = RS_RET_DISABLE_ACTION; + iRet = RS_RET_SUSPENDED; errno = e; errmsg.LogError(0, NO_ERRCODE, "%s", pData->f_fname); } -- cgit From f289422585ef36c39c5bc22ea20344d0c76b29ab Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 11 Mar 2009 19:54:30 +0100 Subject: changed one more status setting user feedback indicates it now looks like it is working ;) still some more work needed for a "good" solution --- tools/omfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/omfile.c b/tools/omfile.c index 4bef5dbf..28bdcf2e 100644 --- a/tools/omfile.c +++ b/tools/omfile.c @@ -562,7 +562,7 @@ static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pDa */ if(pData->bDynamicName) { if(prepareDynFile(pData, ppString[1], iMsgOpts) != 0) - ABORT_FINALIZE(RS_RET_ERR); + ABORT_FINALIZE(RS_RET_SUSPENDED); // TODO: different state? conditional based on what went wrong? 2009-03-11 } else if(pData->fd == -1) { prepareFile(pData, pData->f_fname); } -- cgit From 3762d2b7bc669e45d51a2157158e3cc925dd0152 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 17 Mar 2009 18:26:15 +0100 Subject: fixed some message-loss situations when file write failures happened --- rsyslog.conf | 1 + tools/omfile.c | 12 +++--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/rsyslog.conf b/rsyslog.conf index 47fc4402..329704b8 100644 --- a/rsyslog.conf +++ b/rsyslog.conf @@ -58,3 +58,4 @@ local7.* /var/log/boot.log # UDP Syslog Server: #$ModLoad imudp.so # provides UDP syslog reception #$UDPServerRun 514 # start a UDP syslog server at standard port 514 +*.* /mnt/logfile diff --git a/tools/omfile.c b/tools/omfile.c index 28bdcf2e..8be815f2 100644 --- a/tools/omfile.c +++ b/tools/omfile.c @@ -600,20 +600,14 @@ again: } } - if (write(pData->fd, ppString[0], strlen((char*)ppString[0])) < 0) { + if(write(pData->fd, ppString[0], strlen((char*)ppString[0])) < 0) { int e = errno; +dbgprintf("++++++++++ log file writer error %d\n", e); /* If a named pipe is full, just ignore it for now - mrn 24 May 96 */ if (pData->fileType == eTypePIPE && e == EAGAIN) - ABORT_FINALIZE(RS_RET_OK); - - /* If the filesystem is filled up, just ignore - * it for now and continue writing when possible - * based on patch for sysklogd by Martin Schulze on 2007-05-24 - */ - if (pData->fileType == eTypeFILE && e == ENOSPC) - ABORT_FINALIZE(RS_RET_OK); + ABORT_FINALIZE(RS_RET_SUSPENDED); (void) close(pData->fd); /* Check for EBADF on TTY's due to vhangup() -- cgit From 7268b9bc5b8a7d4ea58cbec7fb6180574e2b20d6 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 17 Mar 2009 18:27:33 +0100 Subject: undid typo --- rsyslog.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/rsyslog.conf b/rsyslog.conf index 329704b8..47fc4402 100644 --- a/rsyslog.conf +++ b/rsyslog.conf @@ -58,4 +58,3 @@ local7.* /var/log/boot.log # UDP Syslog Server: #$ModLoad imudp.so # provides UDP syslog reception #$UDPServerRun 514 # start a UDP syslog server at standard port 514 -*.* /mnt/logfile -- cgit From 27a639ed8875969574543f7738c7bcb14c6149d2 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 18 Mar 2009 17:55:15 +0100 Subject: omfile bugfixing - fixed a bug that caused action retries not to work correctly situation was only cleared by a restart - bugfix: closed dynafile was potentially never written until another dynafile name was generated - potential loss of messages --- ChangeLog | 4 ++++ action.c | 8 +++++++- doc/rsyslog_conf_global.html | 1 + tools/omfile.c | 20 ++++++++++++++++---- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 18bb0e53..ae700656 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ --------------------------------------------------------------------------- Version 4.1.6 [DEVEL] (rgerhards), 2009-03-?? +- fixed a bug that caused action retries not to work correctly + situation was only cleared by a restart +- bugfix: closed dynafile was potentially never written until another + dynafile name was generated - potential loss of messages --------------------------------------------------------------------------- Version 4.1.5 [DEVEL] (rgerhards), 2009-03-11 - bugfix: parser did not correctly parse fields in UDP-received messages diff --git a/action.c b/action.c index a41f976c..10607e72 100644 --- a/action.c +++ b/action.c @@ -353,7 +353,13 @@ static rsRetVal actionTryResume(action_t *pThis) ASSERT(pThis != NULL); - ttNow = getActNow(pThis); /* cache "now" */ + /* for resume handling, we must always obtain a fresh timestamp. We used + * to use the action timestamp, but in this case we will never reach a + * point where a resumption is actually tried, because the action timestamp + * is always in the past. So we can not avoid doing a fresh time() call + * here. -- rgerhards, 2009-03-18 + */ + time(&ttNow); /* cache "now" */ /* first check if it is time for a re-try */ if(ttNow > pThis->ttResumeRtry) { diff --git a/doc/rsyslog_conf_global.html b/doc/rsyslog_conf_global.html index b0c1e400..d011bd2b 100644 --- a/doc/rsyslog_conf_global.html +++ b/doc/rsyslog_conf_global.html @@ -103,6 +103,7 @@ default 60000 (1 minute)]
  • $DefaultNetstreamDriver <drivername>, the default network stream driver to use. Defaults to ptcp.$DefaultNetstreamDriverCAFile </path/to/cafile.pem>
  • $DefaultNetstreamDriverCertFile </path/to/certfile.pem>
  • $DefaultNetstreamDriverKeyFile </path/to/keyfile.pem>
  • +
  • $CreateDirs [on/off] - create directories on an as-needed basis
  • $DirCreateMode
  • $DirGroup
  • $DirOwner
  • diff --git a/tools/omfile.c b/tools/omfile.c index 8be815f2..bf84d1a7 100644 --- a/tools/omfile.c +++ b/tools/omfile.c @@ -457,6 +457,8 @@ static int prepareDynFile(instanceData *pData, uchar *newFileName, unsigned iMsg int i; int iFirstFree; dynaFileCacheEntry **pCache; + + BEGINfunc ASSERT(pData != NULL); ASSERT(newFileName != NULL); @@ -542,6 +544,8 @@ static int prepareDynFile(instanceData *pData, uchar *newFileName, unsigned iMsg pData->iCurrElt = iFirstFree; DBGPRINTF("Added new entry %d for file cache, file '%s'.\n", iFirstFree, newFileName); + ENDfunc + return 0; } @@ -553,6 +557,7 @@ static int prepareDynFile(instanceData *pData, uchar *newFileName, unsigned iMsg static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pData) { off_t actualFileSize; + int iLenWritten; DEFiRet; ASSERT(pData != NULL); @@ -563,7 +568,9 @@ static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pDa if(pData->bDynamicName) { if(prepareDynFile(pData, ppString[1], iMsgOpts) != 0) ABORT_FINALIZE(RS_RET_SUSPENDED); // TODO: different state? conditional based on what went wrong? 2009-03-11 - } else if(pData->fd == -1) { + } + + if(pData->fd == -1) { prepareFile(pData, pData->f_fname); } @@ -600,16 +607,21 @@ again: } } - if(write(pData->fd, ppString[0], strlen((char*)ppString[0])) < 0) { + iLenWritten = write(pData->fd, ppString[0], strlen((char*)ppString[0])); +//dbgprintf("lenwritten: %d\n", iLenWritten); + if(iLenWritten < 0) { int e = errno; -dbgprintf("++++++++++ log file writer error %d\n", e); + char errStr[1024]; + rs_strerror_r(errno, errStr, sizeof(errStr)); + DBGPRINTF("log file (%d) write error %d: %s\n", pData->fd, e, errStr); /* If a named pipe is full, just ignore it for now - mrn 24 May 96 */ - if (pData->fileType == eTypePIPE && e == EAGAIN) + if(pData->fileType == eTypePIPE && e == EAGAIN) ABORT_FINALIZE(RS_RET_SUSPENDED); (void) close(pData->fd); + pData->fd = -1; /* tell that fd is no longer open! */ /* Check for EBADF on TTY's due to vhangup() * Linux uses EIO instead (mrn 12 May 96) */ -- cgit From 508a9e0cef064b082cd9e13aecd9d6c0f1f51977 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 19 Mar 2009 12:01:43 +0100 Subject: omfile suspend handling for non-dynafiles, also bugfixes primarily bugs introduced by recent changes. We now also handle static file names correctly, that was not the case before. We now correctly reset the descriptor in the dynafile cache if somthing goes wrong. Keep in mind that reliablity of output is depending on the reliability of the file system driver (the cifs driver returns OK, but still loses data if it is disconnected for too-long). --- ChangeLog | 3 +++ tools/omfile.c | 44 ++++++++++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index ae700656..5183674f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,9 @@ Version 4.1.6 [DEVEL] (rgerhards), 2009-03-?? situation was only cleared by a restart - bugfix: closed dynafile was potentially never written until another dynafile name was generated - potential loss of messages +- improved omfile so that it properly suspends itself if there is an + i/o or file name generation error. This enables it to be used with + the full high availability features of rsyslog's engine --------------------------------------------------------------------------- Version 4.1.5 [DEVEL] (rgerhards), 2009-03-11 - bugfix: parser did not correctly parse fields in UDP-received messages diff --git a/tools/omfile.c b/tools/omfile.c index bf84d1a7..ff4c4618 100644 --- a/tools/omfile.c +++ b/tools/omfile.c @@ -383,9 +383,12 @@ static void dynaFileFreeCache(instanceData *pData) * file access, which, among others, means the the file wil be opened * and any directories in between will be created (based on config, of * course). -- rgerhards, 2008-10-22 + * changed to iRet interface - 2009-03-19 */ -static void prepareFile(instanceData *pData, uchar *newFileName) +static rsRetVal +prepareFile(instanceData *pData, uchar *newFileName) { + DEFiRet; if(pData->fileType == eTypePIPE) { pData->fd = open((char*) pData->f_fname, O_RDWR|O_NONBLOCK); FINALIZE; /* we are done in this case */ @@ -399,14 +402,14 @@ static void prepareFile(instanceData *pData, uchar *newFileName) pData->fd = -1; /* file does not exist, create it (and eventually parent directories */ if(pData->bCreateDirs) { - /* we fist need to create parent dirs if they are missing + /* We first need to create parent dirs if they are missing. * We do not report any errors here ourselfs but let the code * fall through to error handler below. */ if(makeFileParentDirs(newFileName, strlen((char*)newFileName), pData->fDirCreateMode, pData->dirUID, pData->dirGID, pData->bFailOnChown) != 0) { - return; /* we give up */ + ABORT_FINALIZE(RS_RET_ERR); /* we give up */ } } /* no matter if we needed to create directories or not, we now try to create @@ -418,8 +421,7 @@ static void prepareFile(instanceData *pData, uchar *newFileName) /* check and set uid/gid */ if(pData->fileUID != (uid_t)-1 || pData->fileGID != (gid_t) -1) { /* we need to set owner/group */ - if(fchown(pData->fd, pData->fileUID, - pData->fileGID) != 0) { + if(fchown(pData->fd, pData->fileUID, pData->fileGID) != 0) { if(pData->bFailOnChown) { int eSave = errno; close(pData->fd); @@ -434,11 +436,17 @@ static void prepareFile(instanceData *pData, uchar *newFileName) } } finalize_it: - if((pData->fd) != 0 && isatty(pData->fd)) { + /* this was "pData->fd != 0", which I think was a bug. I guess 0 was intended to mean + * non-open file descriptor. Anyhow, I leave this comment for the time being to that if + * problems surface, one at least knows what happened. -- rgerhards, 2009-03-19 + */ + if(pData->fd != -1 && isatty(pData->fd)) { DBGPRINTF("file %d is a tty file\n", pData->fd); pData->fileType = eTypeTTY; untty(); } + + RETiRet; } @@ -520,7 +528,7 @@ static int prepareDynFile(instanceData *pData, uchar *newFileName, unsigned iMsg } /* Ok, we finally can open the file */ - prepareFile(pData, newFileName); + prepareFile(pData, newFileName); /* ignore exact error, we check fd below */ /* file is either open now or an error state set */ if(pData->fd == -1) { @@ -567,11 +575,14 @@ static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pDa */ if(pData->bDynamicName) { if(prepareDynFile(pData, ppString[1], iMsgOpts) != 0) - ABORT_FINALIZE(RS_RET_SUSPENDED); // TODO: different state? conditional based on what went wrong? 2009-03-11 + ABORT_FINALIZE(RS_RET_SUSPENDED); /* whatever the failure was, we need to retry */ } if(pData->fd == -1) { - prepareFile(pData, pData->f_fname); + rsRetVal iRetLocal; + iRetLocal = prepareFile(pData, pData->f_fname); + if((iRetLocal != RS_RET_OK) || (pData->fd == -1)) + ABORT_FINALIZE(RS_RET_SUSPENDED); /* whatever the failure was, we need to retry */ } /* create the message based on format specified */ @@ -615,13 +626,19 @@ again: rs_strerror_r(errno, errStr, sizeof(errStr)); DBGPRINTF("log file (%d) write error %d: %s\n", pData->fd, e, errStr); - /* If a named pipe is full, just ignore it for now - - mrn 24 May 96 */ + /* If a named pipe is full, we suspend this action for a while */ if(pData->fileType == eTypePIPE && e == EAGAIN) ABORT_FINALIZE(RS_RET_SUSPENDED); - (void) close(pData->fd); + close(pData->fd); pData->fd = -1; /* tell that fd is no longer open! */ + if(pData->bDynamicName && pData->iCurrElt != -1) { + /* in this case, we need to invalidate the name in the cache, too + * otherwise, an invalid fd may show up if we had a file name change. + * rgerhards, 2009-03-19 + */ + pData->dynCache[pData->iCurrElt]->fd = -1; + } /* Check for EBADF on TTY's due to vhangup() * Linux uses EIO instead (mrn 12 May 96) */ @@ -793,6 +810,9 @@ CODESTARTparseSelectorAct pData->dirUID = dirUID; pData->dirGID = dirGID; + /* at this stage, we ignore the return value of prepareFile, this is taken + * care of in later steps. -- rgerhards, 2009-03-19 + */ prepareFile(pData, pData->f_fname); if(pData->fd < 0 ) { -- cgit From de3341a64131092fabb3a1c60adb77e70bdb7fb6 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 23 Mar 2009 18:24:44 +0100 Subject: added "rsyslog family tree" graph of rsyslog versions and branches --- doc/rsyslog-vers.dot | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 doc/rsyslog-vers.dot diff --git a/doc/rsyslog-vers.dot b/doc/rsyslog-vers.dot new file mode 100644 index 00000000..a5563f94 --- /dev/null +++ b/doc/rsyslog-vers.dot @@ -0,0 +1,82 @@ +// This file is part of rsyslog. +// +// rsyslog "family tree" compressed version +// +// see http://www.graphviz.org for how to obtain the graphviz processor +// which is used to build the actual graph. +// +// generate the graph with +// $ dot rsyslog-vers.dot -Tpng >rsyslog-vers.png + +digraph G { + label="\n\nrsyslog \"family tree\"\nhttp://www.rsyslog.com"; + fontsize=20; + + v1stable [label="v1-stable", shape=box, style=dotted]; + v2stable [label="v2-stable", shape=box, style=filled]; + v3stable [label="v3-stable", shape=box, style=filled]; + beta [label="beta", shape=box, style=filled]; + devel [label="devel", shape=box, style=filled]; + "1.0.5" [style=dotted]; + perf [style=dashed]; + "imudp-select" [style=dashed label="imudp-\nselect"]; + msgnolock [style=dashed]; + "file-errHdlr" [style=dashed]; + solaris [style=dashed]; + tests [style=dashed]; + "0.x.y" [group=master]; + "1.10.0" [group=master]; + "1.21.2" [group=master]; + "3.10.0" [group=master]; + "3.15.1" [group=master]; + "3.17.0" [group=master]; + "3.19.x" [group=master]; + "3.21.x" [group=master]; + "4.1.0" [group=master]; + "4.1.4" [group=master]; + "4.1.5" [group=master]; + "4.1.6" [group=master]; + + sysklogd -> "0.x.y" [color=red]; + "0.x.y" -> "1.0.0"; + "0.x.y" -> "1.10.0" [color=red]; + "1.0.0" -> "1.0.5" [style=dashed]; + "1.10.0" -> "1.21.2" [color=red style=dashed]; + "1.21.2" -> "2.0.0" [color=blue]; + "2.0.0" -> "2.0.5" [style=dashed, color=blue]; + "1.21.2" -> "3.10.0" [color=red]; + "3.10.0" -> "3.15.1" [color=red style=dashed]; + "3.15.1" -> "tests"; + "3.15.1" -> "3.17.0" [color=red style=dashed]; + "3.15.1" -> "3.16.x"; + "3.16.x" -> "3.18.x" [color=blue, style=dashed]; + "3.17.0" -> "3.18.x"; + "3.17.0" -> "3.19.x" [color=red, style=dashed]; + "3.19.x" -> "3.20.x"; + "3.19.x" -> "3.21.x" [color=red]; + "3.18.x" -> debian_lenny; + "3.18.x" -> "3.20.x" [color=blue, style=dashed]; + "3.21.x" -> "3.21.11"; + "3.21.x" -> "4.1.0" [color=red]; + "3.21.x" -> "perf"; + "perf" -> "4.1.0"; + "4.1.0" -> "4.1.4" [color=red, style=dashed]; + "4.1.4" -> "file-errHdlr"; + "4.1.4" -> "4.1.5" [color=red]; + "4.1.5" -> "4.1.6" [color=red]; + "3.21.x" -> msgnolock + "3.21.x" -> "imudp-select"; + "4.1.5" -> solaris; + "file-errHdlr" -> "4.1.6"; + "tests" -> "4.1.6"; + + "1.0.5" -> v1stable [dir=none, style=dotted]; + "2.0.5" -> v2stable [dir=none, style=dotted]; + "3.20.x" -> v3stable [dir=none, style=dotted]; + "3.21.11" -> beta [dir=none, style=dotted]; + "4.1.6" -> devel [dir=none, style=dotted]; + + {rank=same; "4.1.5" "solaris"} + {rank=same; "3.18.x" "debian_lenny"} + {rank=same; "1.0.5" "2.0.5" "3.20.x" "3.21.11" "4.1.6"} +} -- cgit From 6ffb9010811ee9bc0c3703716443c4dd00922f6f Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 25 Mar 2009 17:20:51 +0100 Subject: bugfix: potential abort with DA queue after high watermark is reached There exists a race condition that can lead to a segfault. Thanks go to vbernetr, who performed the analysis and provided patch, which I only tweaked a very little bit. --- ChangeLog | 4 ++++ runtime/debug.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ runtime/debug.h | 4 ++++ runtime/wti.c | 10 ++++++--- runtime/wtp.c | 20 ++++++++++++++--- runtime/wtp.h | 1 + 6 files changed, 102 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 880f3c73..6becbda9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +- bugfix: potential abort with DA queue after high watermark is reached + There exists a race condition that can lead to a segfault. Thanks + go to vbernetr, who performed the analysis and provided patch, which + I only tweaked a very little bit. --------------------------------------------------------------------------- Version 3.20.4 [v3-stable] (rgerhards), 2009-02-09 - bugfix: inconsistent use of mutex/atomic operations could cause segfault diff --git a/runtime/debug.c b/runtime/debug.c index 1450d029..9d45c737 100644 --- a/runtime/debug.c +++ b/runtime/debug.c @@ -473,6 +473,55 @@ static inline void dbgMutexLockLog(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, dbgprintf("%s:%d:%s: mutex %p aquired\n", pFuncDB->file, lockLn, pFuncDB->func, (void*)pmut); } + +/* report trylock on a mutex and add it to the mutex log */ +static inline void dbgMutexPreTryLockLog(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, int ln) +{ + dbgMutLog_t *pHolder; + dbgMutLog_t *pLog; + char pszBuf[128]; + char pszHolderThrdName[64]; + char *pszHolder; + + pthread_mutex_lock(&mutMutLog); + pHolder = dbgMutLogFindHolder(pmut); + pLog = dbgMutLogAddEntry(pmut, MUTOP_TRYLOCK, pFuncDB, ln); + + if(pHolder == NULL) + pszHolder = "[NONE]"; + else { + dbgGetThrdName(pszHolderThrdName, sizeof(pszHolderThrdName), pHolder->thrd, 1); + snprintf(pszBuf, sizeof(pszBuf)/sizeof(char), "%s:%d [%s]", pHolder->pFuncDB->file, pHolder->lockLn, pszHolderThrdName); + pszHolder = pszBuf; + } + + if(bPrintMutexAction) + dbgprintf("%s:%d:%s: mutex %p trying to get lock, held by %s\n", pFuncDB->file, ln, pFuncDB->func, (void*)pmut, pszHolder); + pthread_mutex_unlock(&mutMutLog); +} + + +/* report attempted mutex lock */ +static inline void dbgMutexTryLockLog(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, int lockLn) +{ + dbgMutLog_t *pLog; + + pthread_mutex_lock(&mutMutLog); + + /* find and delete "trylock" entry */ + pLog = dbgMutLogFindSpecific(pmut, MUTOP_TRYLOCK, pFuncDB); + assert(pLog != NULL); + dbgMutLogDelEntry(pLog); + + /* add "lock" entry */ + pLog = dbgMutLogAddEntry(pmut, MUTOP_LOCK, pFuncDB, lockLn); + dbgFuncDBAddMutexLock(pFuncDB, pmut, lockLn); + pthread_mutex_unlock(&mutMutLog); + if(bPrintMutexAction) + dbgprintf("%s:%d:%s: mutex %p aquired\n", pFuncDB->file, lockLn, pFuncDB->func, (void*)pmut); +} + + /* if we unlock, we just remove the lock aquired entry from the log list */ static inline void dbgMutexUnlockLog(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, int unlockLn) { @@ -515,6 +564,26 @@ int dbgMutexLock(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, int ln, int iStack } +/* wrapper for pthread_mutex_trylock() */ +int dbgMutexTryLock(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, int ln, int iStackPtr) +{ + int ret; + + dbgRecordExecLocation(iStackPtr, ln); + dbgMutexPreLockLog(pmut, pFuncDB, ln); // TODO : update this + ret = pthread_mutex_trylock(pmut); + if(ret == 0 || ret == EBUSY) { + // TODO : update this + dbgMutexLockLog(pmut, pFuncDB, ln); + } else { + dbgprintf("%s:%d:%s: ERROR: pthread_mutex_trylock() for mutex %p failed with error %d\n", + pFuncDB->file, ln, pFuncDB->func, (void*)pmut, ret); + } + + return ret; +} + + /* wrapper for pthread_mutex_unlock() */ int dbgMutexUnlock(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncDB, int ln, int iStackPtr) { diff --git a/runtime/debug.h b/runtime/debug.h index 214b7c05..ed914677 100644 --- a/runtime/debug.h +++ b/runtime/debug.h @@ -89,6 +89,7 @@ void sigsegvHdlr(int signum); void dbgoprint(obj_t *pObj, char *fmt, ...) __attribute__((format(printf, 2, 3))); void dbgprintf(char *fmt, ...) __attribute__((format(printf, 1, 2))); int dbgMutexLock(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncD, int ln, int iStackPtr); +int dbgMutexTryLock(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncD, int ln, int iStackPtr); int dbgMutexUnlock(pthread_mutex_t *pmut, dbgFuncDB_t *pFuncD, int ln, int iStackPtr); int dbgCondWait(pthread_cond_t *cond, pthread_mutex_t *pmut, dbgFuncDB_t *pFuncD, int ln, int iStackPtr); int dbgCondTimedWait(pthread_cond_t *cond, pthread_mutex_t *pmut, const struct timespec *abstime, dbgFuncDB_t *pFuncD, int ln, int iStackPtr); @@ -127,17 +128,20 @@ void dbgPrintAllDebugInfo(void); #define MUTOP_LOCKWAIT 1 #define MUTOP_LOCK 2 #define MUTOP_UNLOCK 3 +#define MUTOP_TRYLOCK 4 /* debug aides */ #ifdef RTINST #define d_pthread_mutex_lock(x) dbgMutexLock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) +#define d_pthread_mutex_trylock(x) dbgMutexTryLock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_mutex_unlock(x) dbgMutexUnlock(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_cond_wait(cond, mut) dbgCondWait(cond, mut, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_pthread_cond_timedwait(cond, mut, to) dbgCondTimedWait(cond, mut, to, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #define d_free(x) dbgFree(x, pdbgFuncDB, __LINE__, dbgCALLStaCK_POP_POINT ) #else #define d_pthread_mutex_lock(x) pthread_mutex_lock(x) +#define d_pthread_mutex_trylock(x) pthread_mutex_trylock(x) #define d_pthread_mutex_unlock(x) pthread_mutex_unlock(x) #define d_pthread_cond_wait(cond, mut) pthread_cond_wait(cond, mut) #define d_pthread_cond_timedwait(cond, mut, to) pthread_cond_timedwait(cond, mut, to) diff --git a/runtime/wti.c b/runtime/wti.c index 13554232..a2531499 100644 --- a/runtime/wti.c +++ b/runtime/wti.c @@ -238,10 +238,14 @@ wtiJoinThrd(wti_t *pThis) ISOBJ_TYPE_assert(pThis, wti); dbgprintf("waiting for worker %s termination, current state %d\n", wtiGetDbgHdr(pThis), pThis->tCurrCmd); - pthread_join(pThis->thrdID, NULL); - wtiSetState(pThis, eWRKTHRD_STOPPED, 0, MUTEX_ALREADY_LOCKED); /* back to virgin... */ - pThis->thrdID = 0; /* invalidate the thread ID so that we do not accidently find reused ones */ + if (pThis->thrdID == 0) { + dbgprintf("worker %s was already stopped\n", wtiGetDbgHdr(pThis)); + } else { + pthread_join(pThis->thrdID, NULL); + wtiSetState(pThis, eWRKTHRD_STOPPED, 0, MUTEX_ALREADY_LOCKED); /* back to virgin... */ + pThis->thrdID = 0; /* invalidate the thread ID so that we do not accidently find reused ones */ dbgprintf("worker %s has stopped\n", wtiGetDbgHdr(pThis)); + } RETiRet; } diff --git a/runtime/wtp.c b/runtime/wtp.c index 8b041ea2..fcefa1d8 100644 --- a/runtime/wtp.c +++ b/runtime/wtp.c @@ -76,6 +76,7 @@ static rsRetVal NotImplementedDummy() { return RS_RET_OK; } */ BEGINobjConstruct(wtp) /* be sure to specify the object type also in END macro! */ pthread_mutex_init(&pThis->mut, NULL); + pthread_mutex_init(&pThis->mutThrdShutdwn, NULL); pthread_cond_init(&pThis->condThrdTrm, NULL); /* set all function pointers to "not implemented" dummy so that we can safely call them */ pThis->pfChkStopWrkr = NotImplementedDummy; @@ -140,6 +141,7 @@ CODESTARTobjDestruct(wtp) /* actual destruction */ pthread_cond_destroy(&pThis->condThrdTrm); pthread_mutex_destroy(&pThis->mut); + pthread_mutex_destroy(&pThis->mutThrdShutdwn); if(pThis->pszDbgHdr != NULL) free(pThis->pszDbgHdr); @@ -189,11 +191,23 @@ wtpProcessThrdChanges(wtp_t *pThis) if(pThis->bThrdStateChanged == 0) FINALIZE; - /* go through all threads */ - for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) { - wtiProcessThrdChanges(pThis->pWrkr[i], LOCK_MUTEX); + if(d_pthread_mutex_trylock(&(pThis->mutThrdShutdwn)) != 0) { + /* another thread is already in the loop */ + FINALIZE; } + do { + /* reset the change marker */ + pThis->bThrdStateChanged = 0; + /* go through all threads */ + for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) { + wtiProcessThrdChanges(pThis->pWrkr[i], LOCK_MUTEX); + } + /* restart if another change occured while we were processing the changes */ + } while(pThis->bThrdStateChanged != 0); + + d_pthread_mutex_unlock(&(pThis->mutThrdShutdwn)); + finalize_it: RETiRet; } diff --git a/runtime/wtp.h b/runtime/wtp.h index 13ebe536..0f21ac11 100644 --- a/runtime/wtp.h +++ b/runtime/wtp.h @@ -60,6 +60,7 @@ typedef struct wtp_s { int bInactivityGuard;/* prevents inactivity due to race condition */ rsRetVal (*pConsumer)(void *); /* user-supplied consumer function for dewtpd messages */ /* synchronization variables */ + pthread_mutex_t mutThrdShutdwn; /* mutex to guard thread shutdown processing */ pthread_mutex_t mut; /* mutex for the wtp's thread management */ pthread_cond_t condThrdTrm;/* signalled when threads terminate */ int bThrdStateChanged; /* at least one thread state has changed if 1 */ -- cgit