diff options
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/Makefile.am | 1 | ||||
-rw-r--r-- | runtime/atomic.h | 2 | ||||
-rw-r--r-- | runtime/batch.h | 72 | ||||
-rw-r--r-- | runtime/conf.c | 2 | ||||
-rw-r--r-- | runtime/debug.c | 65 | ||||
-rw-r--r-- | runtime/modules.c | 50 | ||||
-rw-r--r-- | runtime/modules.h | 2 | ||||
-rw-r--r-- | runtime/msg.c | 34 | ||||
-rw-r--r-- | runtime/msg.h | 2 | ||||
-rw-r--r-- | runtime/queue.c | 1555 | ||||
-rw-r--r-- | runtime/queue.h | 48 | ||||
-rw-r--r-- | runtime/rsyslog.h | 35 | ||||
-rw-r--r-- | runtime/srUtils.h | 2 | ||||
-rw-r--r-- | runtime/stream.c | 43 | ||||
-rw-r--r-- | runtime/stream.h | 3 | ||||
-rw-r--r-- | runtime/wti.c | 131 | ||||
-rw-r--r-- | runtime/wti.h | 10 | ||||
-rw-r--r-- | runtime/wtp.c | 41 | ||||
-rw-r--r-- | runtime/wtp.h | 14 |
19 files changed, 1333 insertions, 779 deletions
diff --git a/runtime/Makefile.am b/runtime/Makefile.am index 14abe722..caf7c5ca 100644 --- a/runtime/Makefile.am +++ b/runtime/Makefile.am @@ -9,6 +9,7 @@ librsyslog_la_SOURCES = \ rsyslog.h \ unicode-helper.h \ atomic.h \ + batch.h \ syslogd-types.h \ module-template.h \ obj-types.h \ diff --git a/runtime/atomic.h b/runtime/atomic.h index d5aaf56b..f0733f09 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -41,6 +41,8 @@ * They simply came in too late. -- rgerhards, 2008-04-02 */ #ifdef HAVE_ATOMIC_BUILTINS +# define ATOMIC_SUB(data, val) __sync_fetch_and_sub(&(data), val) +# define ATOMIC_ADD(data, val) __sync_fetch_and_add(&(data), val) # define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&(data), 1)) # define ATOMIC_INC_AND_FETCH(data) __sync_fetch_and_add(&(data), 1) # define ATOMIC_DEC(data) ((void) __sync_sub_and_fetch(&(data), 1)) diff --git a/runtime/batch.h b/runtime/batch.h new file mode 100644 index 00000000..031718a7 --- /dev/null +++ b/runtime/batch.h @@ -0,0 +1,72 @@ +/* Definition of the batch_t data structure. + * I am not sure yet if this will become a full-blown object. For now, this header just + * includes the object definition and is not accompanied by code. + * + * Copyright 2009 by Rainer Gerhards and Adiscon GmbH. + * + * This file is part of the rsyslog runtime library. + * + * The rsyslog runtime library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * The rsyslog runtime library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the rsyslog runtime library. If not, see <http://www.gnu.org/licenses/>. + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution. + */ + +#ifndef BATCH_H_INCLUDED +#define BATCH_H_INCLUDED + +/* enum for batch states. Actually, we violate a layer here, in that we assume that a batch is used + * for action processing. So far, this seems acceptable, the status is simply ignored inside the + * main message queue. But over time, it could potentially be useful to split the two. + * rgerhad, 2009-05-12 + */ +typedef enum { + BATCH_STATE_RDY = 0, /* object ready for processing */ + BATCH_STATE_BAD = 1, /* unrecoverable failure while processing, do NOT resubmit to same action */ + BATCH_STATE_SUB = 2, /* message submitted for processing, outcome yet unkonwn */ + BATCH_STATE_COMM = 3, /* message successfully commited */ + BATCH_STATE_DISC = 4, /* discarded - processed OK, but do not submit to any other action */ +} batch_state_t; + + +/* an object inside a batch, including any information (state!) needed for it to "life". + */ +struct batch_obj_s { + obj_t *pUsrp; /* pointer to user object (most often message) */ + batch_state_t state; /* associated state */ +}; + +/* the batch + * This object is used to dequeue multiple user pointers which are than handed over + * to processing. The size of elements is fixed after queue creation, but may be + * modified by config variables (better said: queue properties). + * Note that a "user pointer" in rsyslog context so far always is a message + * object. We stick to the more generic term because queues may potentially hold + * other types of objects, too. + * rgerhards, 2009-05-12 + * Note that nElem is not necessarily equal to nElemDeq. This is the case when we + * discard some elements (because of configuration) during dequeue processing. As + * all Elements are only deleted when the batch is processed, we can not immediately + * delete them. So we need to keep their number that we can delete them when the batch + * is completed (else, the whole process does not work correctly). + */ +struct batch_s { + int nElem; /* actual number of element in this entry */ + int nElemDeq; /* actual number of elements dequeued (and thus to be deleted) - see comment above! */ + int iDoneUpTo; /* all messages below this index have state other than RDY */ + qDeqID deqID; /* ID of dequeue operation that generated this batch */ + batch_obj_t *pElem; /* batch elements */ +}; + +#endif /* #ifndef BATCH_H_INCLUDED */ diff --git a/runtime/conf.c b/runtime/conf.c index 83ed2e9b..84178fd5 100644 --- a/runtime/conf.c +++ b/runtime/conf.c @@ -1102,7 +1102,7 @@ static rsRetVal cflineDoAction(uchar **p, action_t **ppAction) dbgprintf("module is incompatible with RepeatedMsgReduction - turned off\n"); pAction->f_ReduceRepeated = 0; } - pAction->bEnabled = 1; /* action is enabled */ + pAction->eState = ACT_STATE_RDY; /* action is enabled */ iNbrActions++; /* one more active action! */ } break; diff --git a/runtime/debug.c b/runtime/debug.c index 807fd3f7..fb751efb 100644 --- a/runtime/debug.c +++ b/runtime/debug.c @@ -830,13 +830,12 @@ sigsegvHdlr(int signum) abort(); } -#if 1 -#pragma GCC diagnostic ignored "-Wempty-body" -/* write the debug message. This is a helper to dbgprintf and dbgoprint which - * contains common code. added 2008-09-26 rgerhards +/* actually write the debug message. This is a separate fuction because the cleanup_push/_pop + * interface otherwise is unsafe to use (generates compiler warnings at least). + * 2009-05-20 rgerhards */ -static void -dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) +static inline void +do_dbgprint(uchar *pszObjName, char *pszMsg, size_t lenMsg) { static pthread_t ptLastThrdID = 0; static int bWasNL = 0; @@ -844,20 +843,6 @@ dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) char pszWriteBuf[1024]; size_t lenWriteBuf; struct timespec t; - uchar *pszObjName = NULL; - - /* we must get the object name before we lock the mutex, because the object - * potentially calls back into us. If we locked the mutex, we would deadlock - * ourselfs. On the other hand, the GetName call needs not to be protected, as - * this thread has a valid reference. If such an object is deleted by another - * thread, we are in much more trouble than just for dbgprint(). -- rgerhards, 2008-09-26 - */ - if(pObj != NULL) { - pszObjName = obj.GetName(pObj); - } - - pthread_mutex_lock(&mutdbgprint); - pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprint); /* The bWasNL handler does not really work. It works if no thread * switching occurs during non-NL messages. Else, things are messed @@ -905,11 +890,35 @@ dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) if(altdbg != -1) write(altdbg, pszMsg, lenMsg); bWasNL = (pszMsg[lenMsg - 1] == '\n') ? 1 : 0; +} + +#pragma GCC diagnostic ignored "-Wempty-body" +/* write the debug message. This is a helper to dbgprintf and dbgoprint which + * contains common code. added 2008-09-26 rgerhards + */ +static void +dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg) +{ + uchar *pszObjName = NULL; + + /* we must get the object name before we lock the mutex, because the object + * potentially calls back into us. If we locked the mutex, we would deadlock + * ourselfs. On the other hand, the GetName call needs not to be protected, as + * this thread has a valid reference. If such an object is deleted by another + * thread, we are in much more trouble than just for dbgprint(). -- rgerhards, 2008-09-26 + */ + if(pObj != NULL) { + pszObjName = obj.GetName(pObj); + } + + pthread_mutex_lock(&mutdbgprint); + pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprint); + + do_dbgprint(pszObjName, pszMsg, lenMsg); pthread_cleanup_pop(1); } #pragma GCC diagnostic warning "-Wempty-body" -#endif /* print some debug output when an object is given * This is mostly a copy of dbgprintf, but I do not know how to combine it @@ -1052,7 +1061,9 @@ int dbgEntrFunc(dbgFuncDB_t **ppFuncDB, const char *file, const char *func, int /* when we reach this point, we have a fully-initialized FuncDB! */ ATOMIC_INC(pFuncDB->nTimesCalled); if(bLogFuncFlow && dbgPrintNameIsInList((const uchar*)pFuncDB->file, printNameFileRoot)) - dbgprintf("%s:%d: %s: enter\n", pFuncDB->file, pFuncDB->line, pFuncDB->func); + if(strcmp(pFuncDB->file, "stringbuf.c")) { /* TODO: make configurable */ + dbgprintf("%s:%d: %s: enter\n", pFuncDB->file, pFuncDB->line, pFuncDB->func); + } if(pThrd->stackPtr >= (int) (sizeof(pThrd->callStack) / sizeof(dbgFuncDB_t*))) { dbgprintf("%s:%d: %s: debug module: call stack for this thread full, suspending call tracking\n", pFuncDB->file, pFuncDB->line, pFuncDB->func); @@ -1082,10 +1093,12 @@ void dbgExitFunc(dbgFuncDB_t *pFuncDB, int iStackPtrRestore, int iRet) dbgFuncDBPrintActiveMutexes(pFuncDB, "WARNING: mutex still owned by us as we exit function, mutex: ", pthread_self()); if(bLogFuncFlow && dbgPrintNameIsInList((const uchar*)pFuncDB->file, printNameFileRoot)) { - if(iRet == RS_RET_NO_IRET) - dbgprintf("%s:%d: %s: exit: (no iRet)\n", pFuncDB->file, pFuncDB->line, pFuncDB->func); - else - dbgprintf("%s:%d: %s: exit: %d\n", pFuncDB->file, pFuncDB->line, pFuncDB->func, iRet); + if(strcmp(pFuncDB->file, "stringbuf.c")) { /* TODO: make configurable */ + if(iRet == RS_RET_NO_IRET) + dbgprintf("%s:%d: %s: exit: (no iRet)\n", pFuncDB->file, pFuncDB->line, pFuncDB->func); + else + dbgprintf("%s:%d: %s: exit: %d\n", pFuncDB->file, pFuncDB->line, pFuncDB->func, iRet); + } } pThrd->stackPtr = iStackPtrRestore; if(pThrd->stackPtr < 0) { diff --git a/runtime/modules.c b/runtime/modules.c index bd201252..0ec6b1ce 100644 --- a/runtime/modules.c +++ b/runtime/modules.c @@ -77,6 +77,21 @@ static modInfo_t *pLoadedModulesLast = NULL; /* tail-pointer */ uchar *pModDir = NULL; /* read-only after startup */ +/* we provide a set of dummy functions for output modules that do not support the + * transactional interface. As they do not do this, they commit each message they + * receive, and as such the dummies can always return RS_RET_OK without causing + * harm. This simplifies things as in action processing we do not need to check + * if the transactional entry points exist. + */ +static rsRetVal dummyBeginTransaction() +{ + return RS_RET_OK; +} +static rsRetVal dummyEndTransaction() +{ + return RS_RET_OK; +} + #ifdef DEBUG /* we add some home-grown support to track our users (and detect who does not free us). In * the long term, this should probably be migrated into debug.c (TODO). -- rgerhards, 2008-03-11 @@ -216,19 +231,38 @@ static void moduleDestruct(modInfo_t *pThis) } +/* This enables a module to query the core for specific features. + * rgerhards, 2009-04-22 + */ +static rsRetVal queryCoreFeatureSupport(int *pBool, unsigned uFeat) +{ + DEFiRet; + + if((pBool == NULL)) + ABORT_FINALIZE(RS_RET_PARAM_ERROR); + + *pBool = (uFeat & CORE_FEATURE_BATCHING) ? 1 : 0; + +finalize_it: + RETiRet; +} + + /* The following function is the queryEntryPoint for host-based entry points. * Modules may call it to get access to core interface functions. Please note * that utility functions can be accessed via shared libraries - at least this * is my current shool of thinking. * Please note that the implementation as a query interface allows to take * care of plug-in interface version differences. -- rgerhards, 2007-07-31 + * ... but often it better not to use a new interface. So we now add core + * functions here that a plugin may request. -- rgerhards, 2009-04-22 */ static rsRetVal queryHostEtryPt(uchar *name, rsRetVal (**pEtryPoint)()) { DEFiRet; if((name == NULL) || (pEtryPoint == NULL)) - return RS_RET_PARAM_ERROR; + ABORT_FINALIZE(RS_RET_PARAM_ERROR); if(!strcmp((char*) name, "regCfSysLineHdlr")) { *pEtryPoint = regCfSysLineHdlr; @@ -236,6 +270,8 @@ static rsRetVal queryHostEtryPt(uchar *name, rsRetVal (**pEtryPoint)()) *pEtryPoint = objGetObjInterface; } else if(!strcmp((char*) name, "OMSRgetSupportedTplOpts")) { *pEtryPoint = OMSRgetSupportedTplOpts; + } else if(!strcmp((char*) name, "queryCoreFeatureSupport")) { + *pEtryPoint = queryCoreFeatureSupport; } else { *pEtryPoint = NULL; /* to be on the safe side */ ABORT_FINALIZE(RS_RET_ENTRY_POINT_NOT_FOUND); @@ -412,6 +448,18 @@ doModInit(rsRetVal (*modInit)(int, int*, rsRetVal(**)(), rsRetVal(*)(), modInfo_ localRet = (*pNew->modQueryEtryPt)((uchar*)"doHUP", &pNew->doHUP); if(localRet != RS_RET_OK && localRet != RS_RET_MODULE_ENTRY_POINT_NOT_FOUND) ABORT_FINALIZE(localRet); + + localRet = (*pNew->modQueryEtryPt)((uchar*)"beginTransaction", &pNew->mod.om.beginTransaction); + if(localRet == RS_RET_MODULE_ENTRY_POINT_NOT_FOUND) + pNew->mod.om.beginTransaction = dummyBeginTransaction; + else if(localRet != RS_RET_OK) + ABORT_FINALIZE(localRet); + + localRet = (*pNew->modQueryEtryPt)((uchar*)"endTransaction", &pNew->mod.om.endTransaction); + if(localRet == RS_RET_MODULE_ENTRY_POINT_NOT_FOUND) + pNew->mod.om.beginTransaction = dummyEndTransaction; + else if(localRet != RS_RET_OK) + ABORT_FINALIZE(localRet); break; case eMOD_LIB: break; diff --git a/runtime/modules.h b/runtime/modules.h index 4d874019..71e3199c 100644 --- a/runtime/modules.h +++ b/runtime/modules.h @@ -111,7 +111,9 @@ typedef struct modInfo_s { struct {/* data for output modules */ /* below: perform the configured action */ + rsRetVal (*beginTransaction)(void*); rsRetVal (*doAction)(uchar**, unsigned, void*); + rsRetVal (*endTransaction)(void*); rsRetVal (*parseSelectorAct)(uchar**, void**,omodStringRequest_t**); } om; struct { /* data for library modules */ diff --git a/runtime/msg.c b/runtime/msg.c index de298871..8a72a6d6 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -1130,15 +1130,21 @@ char *getProtocolVersionString(msg_t *pM) } -static char *getRawMsg(msg_t *pM) +static inline void +getRawMsg(msg_t *pM, uchar **pBuf, int *piLen) { - if(pM == NULL) - return ""; - else - if(pM->pszRawMsg == NULL) - return ""; - else - return (char*)pM->pszRawMsg; + if(pM == NULL) { + *pBuf= UCHAR_CONSTANT(""); + *piLen = 0; + } else { + if(pM->pszRawMsg == NULL) { + *pBuf= UCHAR_CONSTANT(""); + *piLen = 0; + } else { + *pBuf = pM->pszRawMsg; + *piLen = pM->iLenRawMsg; + } + } } @@ -1756,10 +1762,10 @@ int getProgramNameLen(msg_t *pM, bool bLockMutex) /* get the "programname" as sz string * rgerhards, 2005-10-19 */ -char *getProgramName(msg_t *pM, bool bLockMutex) +uchar *getProgramName(msg_t *pM, bool bLockMutex) { prepareProgramName(pM, bLockMutex); - return (pM->pCSProgName == NULL) ? "" : (char*) rsCStrGetSzStrNoNULL(pM->pCSProgName); + return (pM->pCSProgName == NULL) ? UCHAR_CONSTANT("") : rsCStrGetSzStrNoNULL(pM->pCSProgName); } @@ -1776,7 +1782,7 @@ static void tryEmulateAPPNAME(msg_t *pM) if(getProtocolVersion(pM) == 0) { /* only then it makes sense to emulate */ - MsgSetAPPNAME(pM, getProgramName(pM, MUTEX_ALREADY_LOCKED)); + MsgSetAPPNAME(pM, (char*)getProgramName(pM, MUTEX_ALREADY_LOCKED)); } } @@ -2165,12 +2171,13 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, break; case PROP_HOSTNAME: pRes = (uchar*)getHOSTNAME(pMsg); + bufLen = getHOSTNAMELen(pMsg); break; case PROP_SYSLOGTAG: getTAG(pMsg, &pRes, &bufLen); break; case PROP_RAWMSG: - pRes = (uchar*)getRawMsg(pMsg); + getRawMsg(pMsg, &pRes, &bufLen); break; /* enable this, if someone actually uses UxTradMsg, delete after some time has * passed and nobody complained -- rgerhards, 2009-06-16 @@ -2202,6 +2209,7 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, break; case PROP_IUT: pRes = UCHAR_CONSTANT("1"); /* always 1 for syslog messages (a MonitorWare thing;)) */ + bufLen = 1; break; case PROP_SYSLOGFACILITY: pRes = (uchar*)getFacility(pMsg); @@ -2219,7 +2227,7 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, pRes = (uchar*)getTimeGenerated(pMsg, pTpe->data.field.eDateFormat); break; case PROP_PROGRAMNAME: - pRes = (uchar*)getProgramName(pMsg, LOCK_MUTEX); + pRes = getProgramName(pMsg, LOCK_MUTEX); break; case PROP_PROTOCOL_VERSION: pRes = (uchar*)getProtocolVersionString(pMsg); diff --git a/runtime/msg.h b/runtime/msg.h index 0b346f7b..98b3599a 100644 --- a/runtime/msg.h +++ b/runtime/msg.h @@ -176,7 +176,7 @@ int getMSGLen(msg_t *pM); char *getHOSTNAME(msg_t *pM); int getHOSTNAMELen(msg_t *pM); -char *getProgramName(msg_t *pM, bool bLockMutex); +uchar *getProgramName(msg_t *pM, bool bLockMutex); int getProgramNameLen(msg_t *pM, bool bLockMutex); uchar *getRcvFrom(msg_t *pM); rsRetVal propNameToID(cstr_t *pCSPropName, propid_t *pPropID); diff --git a/runtime/queue.c b/runtime/queue.c index ddff1bcf..e2641c5b 100644 --- a/runtime/queue.c +++ b/runtime/queue.c @@ -8,7 +8,11 @@ * (and in the web doc set on http://www.rsyslog.com/doc). Be sure to read it * if you are getting aquainted to the object. * - * Copyright 2008 Rainer Gerhards and Adiscon GmbH. + * NOTE: as of 2009-04-22, I have begin to remove the qqueue* prefix from static + * function names - this makes it really hard to read and does not provide much + * benefit, at least I (now) think so... + * + * Copyright 2008, 2009 Rainer Gerhards and Adiscon GmbH. * * This file is part of the rsyslog runtime library. * @@ -51,6 +55,7 @@ #include "wti.h" #include "msg.h" #include "atomic.h" +#include "msg.h" /* TODO: remove once we remove MsgAddRef() call */ #ifdef OS_SOLARIS # include <sched.h> @@ -63,59 +68,158 @@ DEFobjCurrIf(glbl) DEFobjCurrIf(strm) /* forward-definitions */ -static rsRetVal qqueueChkPersist(qqueue_t *pThis); -static rsRetVal qqueueSetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex); -static rsRetVal qqueueRateLimiter(qqueue_t *pThis); +static rsRetVal ChkStrtDA(qqueue_t *pThis); +static rsRetVal qqueueChkPersist(qqueue_t *pThis, int nUpdates); +static rsRetVal SetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex); +static rsRetVal RateLimiter(qqueue_t *pThis); static int qqueueChkStopWrkrDA(qqueue_t *pThis); +static rsRetVal GetDeqBatchSize(qqueue_t *pThis, int *pVal); static int qqueueIsIdleDA(qqueue_t *pThis); -static rsRetVal qqueueConsumerDA(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave); -static rsRetVal qqueueConsumerCancelCleanup(void *arg1, void *arg2); -static rsRetVal qqueueUngetObj(qqueue_t *pThis, obj_t *pUsr, int bLockMutex); +static rsRetVal ConsumerDA(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave); +static rsRetVal batchProcessed(qqueue_t *pThis, wti_t *pWti); /* some constants for queuePersist () */ #define QUEUE_CHECKPOINT 1 #define QUEUE_NO_CHECKPOINT 0 +/*********************************************************************** + * we need a private data structure, the "to-delete" list. As C does + * not provide any partly private data structures, we implement this + * structure right here inside the module. + * Note that this list must always be kept sorted based on a unique + * dequeue ID (which is monotonically increasing). + * rgerhards, 2009-05-18 + ***********************************************************************/ + +/* generate next uniqueue dequeue ID. Note that uniqueness is only required + * on a per-queue basis and while this instance runs. So a stricly monotonically + * increasing counter is sufficient (if enough bits are used). + */ +static inline qDeqID getNextDeqID(qqueue_t *pQueue) +{ + ISOBJ_TYPE_assert(pQueue, qqueue); + return pQueue->deqIDAdd++; +} + + +/* return the top element of the to-delete list or NULL, if the + * list is empty. + */ +static inline toDeleteLst_t *tdlPeek(qqueue_t *pQueue) +{ + ISOBJ_TYPE_assert(pQueue, qqueue); + return pQueue->toDeleteLst; +} + + +/* remove the top element of the to-delete list. Nothing but the + * element itself is destroyed. Must not be called when the list + * is empty. + */ +static inline rsRetVal tdlPop(qqueue_t *pQueue) +{ + toDeleteLst_t *pRemove; + DEFiRet; + + ISOBJ_TYPE_assert(pQueue, qqueue); + assert(pQueue->toDeleteLst != NULL); + + pRemove = pQueue->toDeleteLst; + pQueue->toDeleteLst = pQueue->toDeleteLst->pNext; + free(pRemove); + + RETiRet; +} + + +/* Add a new to-delete list entry. The function allocates the data + * structure, populates it with the values provided and links the new + * element into the correct place inside the list. + */ +static inline rsRetVal tdlAdd(qqueue_t *pQueue, qDeqID deqID, int nElemDeq) +{ + toDeleteLst_t *pNew; + toDeleteLst_t *pPrev; + DEFiRet; + + ISOBJ_TYPE_assert(pQueue, qqueue); + assert(pQueue->toDeleteLst != NULL); + + CHKmalloc(pNew = malloc(sizeof(toDeleteLst_t))); + pNew->deqID = deqID; + pNew->nElemDeq = nElemDeq; + + /* now find right spot */ + for( pPrev = pQueue->toDeleteLst + ; pPrev != NULL && deqID > pPrev->deqID + ; pPrev = pPrev->pNext) { + /*JUST SEARCH*/; + } + + if(pPrev == NULL) { + pNew->pNext = pQueue->toDeleteLst; + pQueue->toDeleteLst = pNew; + } else { + pNew->pNext = pPrev->pNext; + pPrev->pNext = pNew; + } + +finalize_it: + RETiRet; +} + + /* methods */ -/* get the overall queue size, which includes ungotten objects. Must only be called +/* get the physical queue size. Must only be called * while mutex is locked! * rgerhards, 2008-01-29 */ static inline int -qqueueGetOverallQueueSize(qqueue_t *pThis) +getPhysicalQueueSize(qqueue_t *pThis) { -#if 0 /* leave a bit in for debugging -- rgerhards, 2008-01-30 */ -BEGINfunc -dbgoprint((obj_t*) pThis, "queue size: %d (regular %d, ungotten %d)\n", - pThis->iQueueSize + pThis->iUngottenObjs, pThis->iQueueSize, pThis->iUngottenObjs); -ENDfunc -#endif - return pThis->iQueueSize + pThis->iUngottenObjs; + return pThis->iQueueSize; } +/* get the logical queue size (that is store size minus logically dequeued elements). + * Must only be called while mutex is locked! + * rgerhards, 2009-05-19 + */ +static inline int +getLogicalQueueSize(qqueue_t *pThis) +{ + return pThis->iQueueSize - pThis->nLogDeq; +} + + + /* This function drains the queue in cases where this needs to be done. The most probable * reason is a HUP which needs to discard data (because the queue is configured to be lossy). * During a shutdown, this is typically not needed, as the OS frees up ressources and does * this much quicker than when we clean up ourselvs. -- rgerhards, 2008-10-21 * This function returns void, as it makes no sense to communicate an error back, even if * it happens. + * This functions works "around" the regular deque mechanism, because it is only used to + * clean up (in cases where message loss is acceptable). */ static inline void queueDrain(qqueue_t *pThis) { void *pUsr; - ASSERT(pThis != NULL); + BEGINfunc + dbgoprint((obj_t*) pThis, "queue (type %d) will lose %d messages, destroying...\n", pThis->qType, pThis->iQueueSize); /* iQueueSize is not decremented by qDel(), so we need to do it ourselves */ while(ATOMIC_DEC_AND_FETCH(pThis->iQueueSize) > 0) { - pThis->qDel(pThis, &pUsr); + pThis->qDeq(pThis, &pUsr); if(pUsr != NULL) { objDestruct(pUsr); } + pThis->qDel(pThis); } + ENDfunc } @@ -138,17 +242,17 @@ static inline rsRetVal qqueueAdviseMaxWorkers(qqueue_t *pThis) /* if we have not yet reached the high water mark, there is no need to start a * worker. -- rgerhards, 2008-01-26 */ - if(qqueueGetOverallQueueSize(pThis) >= pThis->iHighWtrMrk || pThis->bQueueStarted == 0) { + if(getLogicalQueueSize(pThis) >= pThis->iHighWtrMrk || pThis->bQueueStarted == 0) { wtpAdviseMaxWorkers(pThis->pWtpDA, 1); /* disk queues have always one worker */ } + } + /* regular workers always run */ + if(pThis->qType == QUEUETYPE_DISK || pThis->iMinMsgsPerWrkr == 0) { + iMaxWorkers = 1; } else { - if(pThis->qType == QUEUETYPE_DISK || pThis->iMinMsgsPerWrkr == 0) { - iMaxWorkers = 1; - } else { - iMaxWorkers = qqueueGetOverallQueueSize(pThis) / pThis->iMinMsgsPerWrkr + 1; - } - wtpAdviseMaxWorkers(pThis->pWtpReg, iMaxWorkers); /* disk queues have always one worker */ + iMaxWorkers = getLogicalQueueSize(pThis) / pThis->iMinMsgsPerWrkr + 1; } + wtpAdviseMaxWorkers(pThis->pWtpReg, iMaxWorkers); /* disk queues have always one worker */ } RETiRet; @@ -156,7 +260,11 @@ static inline rsRetVal qqueueAdviseMaxWorkers(qqueue_t *pThis) /* wait until we have a fully initialized DA queue. Sometimes, we need to - * sync with it, as we expect it for some function. + * sync with it, as we expect it for some function. Note that in extreme + * cases, the DA queue may already have started up AND terminated when we + * call this function. As such,it may validly be that DA is already shut down. + * So we just check if we are in init phase and then wait for full startup. + * If in non-DA mode, we silently return. * rgerhards, 2008-02-27 */ static rsRetVal @@ -165,9 +273,8 @@ qqueueWaitDAModeInitialized(qqueue_t *pThis) DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); - ASSERT(pThis->bRunsDA); - while(pThis->bRunsDA != 2) { + while(pThis->bRunsDA == 1) { d_pthread_cond_wait(&pThis->condDAReady, pThis->mut); } @@ -186,45 +293,38 @@ qqueueWaitDAModeInitialized(qqueue_t *pThis) * rgerhards, 2008-01-15 */ static rsRetVal -qqueueTurnOffDAMode(qqueue_t *pThis) +TurnOffDAMode(qqueue_t *pThis) { DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); ASSERT(pThis->bRunsDA); - /* at this point, we need a fully initialized DA queue. So if it isn't, we finally need - * to wait for its startup... -- rgerhards, 2008-01-25 - */ - qqueueWaitDAModeInitialized(pThis); - /* if we need to pull any data that we still need from the (child) disk queue, * now would be the time to do so. At present, we do not need this, but I'd like to * keep that comment if future need arises. */ /* we need to check if the DA queue is empty because the DA worker may simply have - * terminated do to no new messages arriving. That does not, however, mean that the + * terminated due to no new messages arriving. That does not, however, mean that the * DA queue is empty. If there is still data in that queue, we do nothing and leave * that for a later incarnation of this function (it will be called multiple times * during the lifetime of DA-mode, depending on how often the DA worker receives an * inactivity timeout. -- rgerhards, 2008-01-25 */ - if(pThis->pqDA->iQueueSize == 0) { +dbgprintf("XXX: getLogicalQueueSize(pThis->pqDA): %d\n", getLogicalQueueSize(pThis->pqDA)); + if(getLogicalQueueSize(pThis->pqDA) == 0) { pThis->bRunsDA = 0; /* tell the world we are back in non-DA mode */ /* we destruct the queue object, which will also shutdown the queue worker. As the queue is empty, * this will be quick. */ - qqueueDestruct(&pThis->pqDA); /* and now we are ready to destruct the DA queue */ +//XXX: TODO qqueueDestruct(&pThis->pqDA); /* and now we are ready to destruct the DA queue */ dbgoprint((obj_t*) pThis, "disk-assistance has been turned off, disk queue was empty (iRet %d)\n", iRet); - /* now we need to check if the regular queue has some messages. This may be the case - * when it is waiting that the high water mark is reached again. If so, we need to start up - * a regular worker. -- rgerhards, 2008-01-26 - */ - if(qqueueGetOverallQueueSize(pThis) > 0) { - qqueueAdviseMaxWorkers(pThis); - } + } else { + /* the queue has data again! */ + dbgprintf("DA queue has data during shutdown, restarting...\n"); + qqueueAdviseMaxWorkers(pThis->pqDA); } RETiRet; @@ -267,7 +367,7 @@ qqueueChkIsDA(qqueue_t *pThis) * rgerhards, 2008-01-15 */ static rsRetVal -qqueueStartDA(qqueue_t *pThis) +StartDA(qqueue_t *pThis) { DEFiRet; uchar pszDAQName[128]; @@ -298,11 +398,15 @@ qqueueStartDA(qqueue_t *pThis) CHKiRet(qqueueSetbSyncQueueFiles(pThis->pqDA, pThis->bSyncQueueFiles)); CHKiRet(qqueueSettoActShutdown(pThis->pqDA, pThis->toActShutdown)); CHKiRet(qqueueSettoEnq(pThis->pqDA, pThis->toEnq)); - CHKiRet(qqueueSetEnqOnly(pThis->pqDA, pThis->bDAEnqOnly, MUTEX_ALREADY_LOCKED)); + CHKiRet(SetEnqOnly(pThis->pqDA, pThis->bDAEnqOnly, MUTEX_ALREADY_LOCKED)); CHKiRet(qqueueSetiDeqtWinFromHr(pThis->pqDA, pThis->iDeqtWinFromHr)); CHKiRet(qqueueSetiDeqtWinToHr(pThis->pqDA, pThis->iDeqtWinToHr)); CHKiRet(qqueueSetiHighWtrMrk(pThis->pqDA, 0)); CHKiRet(qqueueSetiDiscardMrk(pThis->pqDA, 0)); + + // experimental: XXX + CHKiRet(qqueueSettoWrkShutdown(pThis->pqDA, 0)); + if(pThis->toQShutdown == 0) { CHKiRet(qqueueSettoQShutdown(pThis->pqDA, 0)); /* if the user really wants... */ } else { @@ -318,14 +422,6 @@ qqueueStartDA(qqueue_t *pThis) if(iRet != RS_RET_OK && iRet != RS_RET_FILE_NOT_FOUND) FINALIZE; /* something is wrong */ - /* as we are right now starting DA mode because we are so busy, it is - * extremely unlikely that any regular worker is sleeping on empty queue. HOWEVER, - * we want to be on the safe side, and so we awake anyone that is waiting - * on one. So even if the scheduler plays badly with us, things should be - * quite well. -- rgerhards, 2008-01-15 - */ - wtpWakeupWrkr(pThis->pWtpReg); /* awake all workers, but not ourselves ;) */ - pThis->bRunsDA = 2; /* we are now in DA mode, but not fully initialized */ pThis->bChildIsDone = 0;/* set to 1 when child's worker detect queue is finished */ pthread_cond_broadcast(&pThis->condDAReady); /* signal we are now initialized and ready to go ;) */ @@ -352,8 +448,8 @@ finalize_it: * If this function fails (should not happen), DA mode is not turned on. * rgerhards, 2008-01-16 */ -static inline rsRetVal -qqueueInitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex) +static rsRetVal +InitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex) { DEFiRet; DEFVARS_mutexProtection; @@ -366,17 +462,19 @@ qqueueInitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex) * is intentional. We assume that when we need it once, we may also need it on another * occasion. Ressources used are quite minimal when no worker is running. * rgerhards, 2008-01-24 + * NOTE: this is the DA worker *pool*, not the DA queue! */ if(pThis->pWtpDA == NULL) { - lenBuf = snprintf((char*)pszBuf, sizeof(pszBuf), "%s:DA", obj.GetName((obj_t*) pThis)); + lenBuf = snprintf((char*)pszBuf, sizeof(pszBuf), "%s:DAwpool", obj.GetName((obj_t*) pThis)); CHKiRet(wtpConstruct (&pThis->pWtpDA)); CHKiRet(wtpSetDbgHdr (pThis->pWtpDA, pszBuf, lenBuf)); CHKiRet(wtpSetpfChkStopWrkr (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, int)) qqueueChkStopWrkrDA)); - CHKiRet(wtpSetpfIsIdle (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, int)) qqueueIsIdleDA)); - CHKiRet(wtpSetpfDoWork (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, void *pWti, int)) qqueueConsumerDA)); - CHKiRet(wtpSetpfOnWorkerCancel (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, void*pWti)) qqueueConsumerCancelCleanup)); - CHKiRet(wtpSetpfOnWorkerStartup (pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) qqueueStartDA)); - CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) qqueueTurnOffDAMode)); + CHKiRet(wtpSetpfGetDeqBatchSize (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, int*)) GetDeqBatchSize)); + CHKiRet(wtpSetpfIsIdle (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, wtp_t*)) qqueueIsIdleDA)); + CHKiRet(wtpSetpfDoWork (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, void *pWti, int)) ConsumerDA)); + CHKiRet(wtpSetpfObjProcessed (pThis->pWtpDA, (rsRetVal (*)(void *pUsr, wti_t *pWti)) batchProcessed)); + CHKiRet(wtpSetpfOnWorkerStartup (pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) StartDA)); + CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) TurnOffDAMode)); CHKiRet(wtpSetpmutUsr (pThis->pWtpDA, pThis->mut)); CHKiRet(wtpSetpcondBusy (pThis->pWtpDA, &pThis->notEmpty)); CHKiRet(wtpSetiNumWorkerThreads (pThis->pWtpDA, 1)); @@ -394,7 +492,7 @@ qqueueInitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex) * that will also start one up. If we forgot that step, everything would be stalled * until the next enqueue request. */ - wtpAdviseMaxWorkers(pThis->pWtpDA, 1); /* DA queues alsways have just one worker max */ + wtpAdviseMaxWorkers(pThis->pWtpDA, 1); /* DA queues always have just one worker max */ finalize_it: END_MTX_PROTECTED_OPERATIONS(pThis->mut); @@ -408,15 +506,15 @@ finalize_it: * complete. * rgerhards, 2008-01-14 */ -static inline rsRetVal -qqueueChkStrtDA(qqueue_t *pThis) +static rsRetVal +ChkStrtDA(qqueue_t *pThis) { DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); /* if we do not hit the high water mark, we have nothing to do */ - if(qqueueGetOverallQueueSize(pThis) != pThis->iHighWtrMrk) + if(getPhysicalQueueSize(pThis) != pThis->iHighWtrMrk) ABORT_FINALIZE(RS_RET_OK); if(pThis->bRunsDA) { @@ -430,15 +528,15 @@ qqueueChkStrtDA(qqueue_t *pThis) * we need at least one). */ dbgoprint((obj_t*) pThis, "%d entries - passed high water mark in DA mode, send notify\n", - qqueueGetOverallQueueSize(pThis)); + getPhysicalQueueSize(pThis)); qqueueAdviseMaxWorkers(pThis); } else { /* this is the case when we are currently not running in DA mode. So it is time * to turn it back on. */ dbgoprint((obj_t*) pThis, "%d entries - passed high water mark for disk-assisted mode, initiating...\n", - qqueueGetOverallQueueSize(pThis)); - qqueueInitDA(pThis, QUEUE_MODE_ENQDEQ, MUTEX_ALREADY_LOCKED); /* initiate DA mode */ + getPhysicalQueueSize(pThis)); + InitDA(pThis, QUEUE_MODE_ENQDEQ, MUTEX_ALREADY_LOCKED); /* initiate DA mode */ } finalize_it: @@ -469,6 +567,7 @@ static rsRetVal qConstructFixedArray(qqueue_t *pThis) ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); } + pThis->tVars.farray.deqhead = 0; pThis->tVars.farray.head = 0; pThis->tVars.farray.tail = 0; @@ -486,9 +585,7 @@ static rsRetVal qDestructFixedArray(qqueue_t *pThis) ASSERT(pThis != NULL); queueDrain(pThis); /* discard any remaining queue entries */ - - if(pThis->tVars.farray.pBuf != NULL) - free(pThis->tVars.farray.pBuf); + free(pThis->tVars.farray.pBuf); RETiRet; } @@ -507,76 +604,57 @@ static rsRetVal qAddFixedArray(qqueue_t *pThis, void* in) RETiRet; } -static rsRetVal qDelFixedArray(qqueue_t *pThis, void **out) + +static rsRetVal qDeqFixedArray(qqueue_t *pThis, void **out) { DEFiRet; ASSERT(pThis != NULL); - *out = (void*) pThis->tVars.farray.pBuf[pThis->tVars.farray.head]; + *out = (void*) pThis->tVars.farray.pBuf[pThis->tVars.farray.deqhead]; - pThis->tVars.farray.head++; - if (pThis->tVars.farray.head == pThis->iMaxQueueSize) - pThis->tVars.farray.head = 0; + pThis->tVars.farray.deqhead++; + if (pThis->tVars.farray.deqhead == pThis->iMaxQueueSize) + pThis->tVars.farray.deqhead = 0; RETiRet; } -/* -------------------- linked list -------------------- */ - -/* first some generic functions which are also used for the unget linked list */ - -static inline rsRetVal qqueueAddLinkedList(qLinkedList_t **ppRoot, qLinkedList_t **ppLast, void* pUsr) +static rsRetVal qDelFixedArray(qqueue_t *pThis) { DEFiRet; - qLinkedList_t *pEntry; - - ASSERT(ppRoot != NULL); - ASSERT(ppLast != NULL); - if((pEntry = (qLinkedList_t*) malloc(sizeof(qLinkedList_t))) == NULL) { - ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); - } - - pEntry->pNext = NULL; - pEntry->pUsr = pUsr; + ASSERT(pThis != NULL); - if(*ppRoot == NULL) { - *ppRoot = *ppLast = pEntry; - } else { - (*ppLast)->pNext = pEntry; - *ppLast = pEntry; - } + pThis->tVars.farray.head++; + if (pThis->tVars.farray.head == pThis->iMaxQueueSize) + pThis->tVars.farray.head = 0; -finalize_it: RETiRet; } -static inline rsRetVal qqueueDelLinkedList(qLinkedList_t **ppRoot, qLinkedList_t **ppLast, obj_t **ppUsr) + +/* reset the logical dequeue pointer to the physical dequeue position. + * This is only needed after we cancelled workers (during queue shutdown). + */ +static rsRetVal +qUnDeqAllFixedArray(qqueue_t *pThis) { DEFiRet; - qLinkedList_t *pEntry; - ASSERT(ppRoot != NULL); - ASSERT(ppLast != NULL); - ASSERT(ppUsr != NULL); - ASSERT(*ppRoot != NULL); - - pEntry = *ppRoot; - *ppUsr = pEntry->pUsr; + ISOBJ_TYPE_assert(pThis, qqueue); - if(*ppRoot == *ppLast) { - *ppRoot = NULL; - *ppLast = NULL; - } else { - *ppRoot = pEntry->pNext; - } - free(pEntry); + dbgoprint((obj_t*) pThis, "resetting FixedArray deq index to %ld (was %ld), logical dequeue count %d\n", + pThis->tVars.farray.head, pThis->tVars.farray.deqhead, pThis->nLogDeq); + + pThis->tVars.farray.deqhead = pThis->tVars.farray.head; + pThis->nLogDeq = 0; RETiRet; } -/* end generic functions which are also used for the unget linked list */ + +/* -------------------- linked list -------------------- */ static rsRetVal qConstructLinkedList(qqueue_t *pThis) @@ -585,8 +663,9 @@ static rsRetVal qConstructLinkedList(qqueue_t *pThis) ASSERT(pThis != NULL); - pThis->tVars.linklist.pRoot = 0; - pThis->tVars.linklist.pLast = 0; + pThis->tVars.linklist.pDeqRoot = NULL; + pThis->tVars.linklist.pDelRoot = NULL; + pThis->tVars.linklist.pLast = NULL; qqueueChkIsDA(pThis); @@ -609,54 +688,79 @@ static rsRetVal qDestructLinkedList(qqueue_t __attribute__((unused)) *pThis) static rsRetVal qAddLinkedList(qqueue_t *pThis, void* pUsr) { - DEFiRet; - - iRet = qqueueAddLinkedList(&pThis->tVars.linklist.pRoot, &pThis->tVars.linklist.pLast, pUsr); -#if 0 qLinkedList_t *pEntry; + DEFiRet; - ASSERT(pThis != NULL); - if((pEntry = (qLinkedList_t*) malloc(sizeof(qLinkedList_t))) == NULL) { - ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); - } + CHKmalloc((pEntry = (qLinkedList_t*) malloc(sizeof(qLinkedList_t)))); pEntry->pNext = NULL; pEntry->pUsr = pUsr; - if(pThis->tVars.linklist.pRoot == NULL) { - pThis->tVars.linklist.pRoot = pThis->tVars.linklist.pLast = pEntry; + if(pThis->tVars.linklist.pDelRoot == NULL) { + pThis->tVars.linklist.pDelRoot = pThis->tVars.linklist.pDeqRoot = pThis->tVars.linklist.pLast = pEntry; } else { pThis->tVars.linklist.pLast->pNext = pEntry; pThis->tVars.linklist.pLast = pEntry; } + if(pThis->tVars.linklist.pDeqRoot == NULL) { + pThis->tVars.linklist.pDeqRoot = pEntry; + } + finalize_it: -#endif RETiRet; } -static rsRetVal qDelLinkedList(qqueue_t *pThis, obj_t **ppUsr) + +static rsRetVal qDeqLinkedList(qqueue_t *pThis, obj_t **ppUsr) { - DEFiRet; - iRet = qqueueDelLinkedList(&pThis->tVars.linklist.pRoot, &pThis->tVars.linklist.pLast, ppUsr); -#if 0 qLinkedList_t *pEntry; + DEFiRet; - ASSERT(pThis != NULL); - ASSERT(pThis->tVars.linklist.pRoot != NULL); - - pEntry = pThis->tVars.linklist.pRoot; + pEntry = pThis->tVars.linklist.pDeqRoot; + ISOBJ_TYPE_assert(pEntry->pUsr, msg); *ppUsr = pEntry->pUsr; + pThis->tVars.linklist.pDeqRoot = pEntry->pNext; + + RETiRet; +} + - if(pThis->tVars.linklist.pRoot == pThis->tVars.linklist.pLast) { - pThis->tVars.linklist.pRoot = NULL; - pThis->tVars.linklist.pLast = NULL; +static rsRetVal qDelLinkedList(qqueue_t *pThis) +{ + qLinkedList_t *pEntry; + DEFiRet; + + pEntry = pThis->tVars.linklist.pDelRoot; + + if(pThis->tVars.linklist.pDelRoot == pThis->tVars.linklist.pLast) { + pThis->tVars.linklist.pDelRoot = pThis->tVars.linklist.pDeqRoot = pThis->tVars.linklist.pLast = NULL; } else { - pThis->tVars.linklist.pRoot = pEntry->pNext; + pThis->tVars.linklist.pDelRoot = pEntry->pNext; } + free(pEntry); -#endif + RETiRet; +} + + +/* reset the logical dequeue pointer to the physical dequeue position. + * This is only needed after we cancelled workers (during queue shutdown). + */ +static rsRetVal +qUnDeqAllLinkedList(qqueue_t *pThis) +{ + DEFiRet; + + ASSERT(pThis != NULL); + + dbgoprint((obj_t*) pThis, "resetting LinkedList deq ptr to %p (was %p), logical dequeue count %d\n", + pThis->tVars.linklist.pDelRoot, pThis->tVars.linklist.pDeqRoot, pThis->nLogDeq); + + pThis->tVars.linklist.pDeqRoot = pThis->tVars.linklist.pDelRoot; + pThis->nLogDeq = 0; + RETiRet; } @@ -725,8 +829,6 @@ qqueueTryLoadPersistedInfo(qqueue_t *pThis) uchar pszQIFNam[MAXFNAME]; size_t lenQIFNam; struct stat stat_buf; - int iUngottenObjs; - obj_t *pUsr; ISOBJ_TYPE_assert(pThis, qqueue); @@ -756,25 +858,22 @@ qqueueTryLoadPersistedInfo(qqueue_t *pThis) /* first, we try to read the property bag for ourselfs */ CHKiRet(obj.DeserializePropBag((obj_t*) pThis, psQIF)); - /* then the ungotten object queue */ - iUngottenObjs = pThis->iUngottenObjs; - pThis->iUngottenObjs = 0; /* will be incremented when we add objects! */ - - while(iUngottenObjs > 0) { - /* fill the queue from disk */ - CHKiRet(obj.Deserialize((void*) &pUsr, (uchar*)"msg", psQIF, NULL, NULL)); - qqueueUngetObj(pThis, pUsr, MUTEX_ALREADY_LOCKED); - --iUngottenObjs; /* one less */ - } - - /* and now the stream objects (some order as when persisted!) */ + /* then the stream objects (same order as when persisted!) */ CHKiRet(obj.Deserialize(&pThis->tVars.disk.pWrite, (uchar*) "strm", psQIF, (rsRetVal(*)(obj_t*,void*))qqueueLoadPersStrmInfoFixup, pThis)); - CHKiRet(obj.Deserialize(&pThis->tVars.disk.pRead, (uchar*) "strm", psQIF, + CHKiRet(obj.Deserialize(&pThis->tVars.disk.pReadDel, (uchar*) "strm", psQIF, (rsRetVal(*)(obj_t*,void*))qqueueLoadPersStrmInfoFixup, pThis)); + /* create a duplicate for the read "pointer". + */ + + CHKiRet(strm.Dup(pThis->tVars.disk.pReadDel, &pThis->tVars.disk.pReadDeq)); + CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDeq, 0)); /* deq must NOT delete the files! */ + CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pReadDeq)); + CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pWrite)); - CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pRead)); + CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pReadDel)); + CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pReadDeq)); /* OK, we could successfully read the file, so we now can request that it be * deleted when we are done with the persisted information. @@ -826,18 +925,26 @@ static rsRetVal qConstructDisk(qqueue_t *pThis) CHKiRet(strm.SetsType(pThis->tVars.disk.pWrite, STREAMTYPE_FILE_CIRCULAR)); CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pWrite)); - CHKiRet(strm.Construct(&pThis->tVars.disk.pRead)); - CHKiRet(strm.SetbSync(pThis->tVars.disk.pRead, pThis->bSyncQueueFiles)); - CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pRead, 1)); - CHKiRet(strm.SetDir(pThis->tVars.disk.pRead, glbl.GetWorkDir(), strlen((char*)glbl.GetWorkDir()))); - CHKiRet(strm.SetiMaxFiles(pThis->tVars.disk.pRead, 10000000)); - CHKiRet(strm.SettOperationsMode(pThis->tVars.disk.pRead, STREAMMODE_READ)); - CHKiRet(strm.SetsType(pThis->tVars.disk.pRead, STREAMTYPE_FILE_CIRCULAR)); - CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pRead)); + CHKiRet(strm.Construct(&pThis->tVars.disk.pReadDeq)); + CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDeq, 0)); + CHKiRet(strm.SetDir(pThis->tVars.disk.pReadDeq, glbl.GetWorkDir(), strlen((char*)glbl.GetWorkDir()))); + CHKiRet(strm.SetiMaxFiles(pThis->tVars.disk.pReadDeq, 10000000)); + CHKiRet(strm.SettOperationsMode(pThis->tVars.disk.pReadDeq, STREAMMODE_READ)); + CHKiRet(strm.SetsType(pThis->tVars.disk.pReadDeq, STREAMTYPE_FILE_CIRCULAR)); + CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pReadDeq)); + CHKiRet(strm.Construct(&pThis->tVars.disk.pReadDel)); + CHKiRet(strm.SetbSync(pThis->tVars.disk.pReadDel, pThis->bSyncQueueFiles)); + CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDel, 1)); + CHKiRet(strm.SetDir(pThis->tVars.disk.pReadDel, glbl.GetWorkDir(), strlen((char*)glbl.GetWorkDir()))); + CHKiRet(strm.SetiMaxFiles(pThis->tVars.disk.pReadDel, 10000000)); + CHKiRet(strm.SettOperationsMode(pThis->tVars.disk.pReadDel, STREAMMODE_READ)); + CHKiRet(strm.SetsType(pThis->tVars.disk.pReadDel, STREAMTYPE_FILE_CIRCULAR)); + CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pReadDel)); - CHKiRet(strm.SetFName(pThis->tVars.disk.pWrite, pThis->pszFilePrefix, pThis->lenFilePrefix)); - CHKiRet(strm.SetFName(pThis->tVars.disk.pRead, pThis->pszFilePrefix, pThis->lenFilePrefix)); + CHKiRet(strm.SetFName(pThis->tVars.disk.pWrite, pThis->pszFilePrefix, pThis->lenFilePrefix)); + CHKiRet(strm.SetFName(pThis->tVars.disk.pReadDeq, pThis->pszFilePrefix, pThis->lenFilePrefix)); + CHKiRet(strm.SetFName(pThis->tVars.disk.pReadDel, pThis->pszFilePrefix, pThis->lenFilePrefix)); } /* now we set (and overwrite in case of a persisted restart) some parameters which @@ -846,7 +953,8 @@ static rsRetVal qConstructDisk(qqueue_t *pThis) * ability to read existing queue files. -- rgerhards, 2008-01-12 */ CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pWrite, pThis->iMaxFileSize)); - CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pRead, pThis->iMaxFileSize)); + CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pReadDeq, pThis->iMaxFileSize)); + CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pReadDel, pThis->iMaxFileSize)); finalize_it: RETiRet; @@ -860,7 +968,8 @@ static rsRetVal qDestructDisk(qqueue_t *pThis) ASSERT(pThis != NULL); strm.Destruct(&pThis->tVars.disk.pWrite); - strm.Destruct(&pThis->tVars.disk.pRead); + strm.Destruct(&pThis->tVars.disk.pReadDeq); + strm.Destruct(&pThis->tVars.disk.pReadDel); RETiRet; } @@ -892,16 +1001,30 @@ finalize_it: RETiRet; } -static rsRetVal qDelDisk(qqueue_t *pThis, void **ppUsr) + +static rsRetVal qDeqDisk(qqueue_t *pThis, void **ppUsr) { DEFiRet; + CHKiRet(obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pReadDeq, NULL, NULL)); + +finalize_it: + RETiRet; +} + + +static rsRetVal qDelDisk(qqueue_t *pThis) +{ + obj_t *pDummyObj; /* we need to deserialize it... */ + DEFiRet; + int64 offsIn; int64 offsOut; - CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pRead, &offsIn)); - CHKiRet(obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pRead, NULL, NULL)); - CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pRead, &offsOut)); + CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pReadDel, &offsIn)); + CHKiRet(obj.Deserialize(&pDummyObj, (uchar*) "msg", pThis->tVars.disk.pReadDel, NULL, NULL)); + objDestruct(pDummyObj); + CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pReadDel, &offsOut)); /* This time it is a bit tricky: we free disk space only upon file deletion. So we need * to keep track of what we have read until we get an out-offset that is lower than the @@ -922,6 +1045,17 @@ finalize_it: RETiRet; } + +/* This is a dummy function for disks - we do not need to reset anything + * because everything is already persisted... + */ +static rsRetVal +qUnDeqAllDisk(__attribute__((unused)) qqueue_t *pThis) +{ + return RS_RET_OK; +} + + /* -------------------- direct (no queueing) -------------------- */ static rsRetVal qConstructDirect(qqueue_t __attribute__((unused)) *pThis) { @@ -936,6 +1070,8 @@ static rsRetVal qDestructDirect(qqueue_t __attribute__((unused)) *pThis) static rsRetVal qAddDirect(qqueue_t *pThis, void* pUsr) { + batch_t singleBatch; + batch_obj_t batchObj; DEFiRet; ASSERT(pThis != NULL); @@ -945,70 +1081,33 @@ static rsRetVal qAddDirect(qqueue_t *pThis, void* pUsr) * mode the consumer probably has a lot to convey (which get's lost in the other modes * because they are asynchronous. But direct mode is deliberately synchronous. * rgerhards, 2008-02-12 + * We use our knowledge about the batch_t structure below, but without that, we + * pay a too-large performance toll... -- rgerhards, 2009-04-22 */ - iRet = pThis->pConsumer(pThis->pUsr, pUsr); + batchObj.state = BATCH_STATE_RDY; + batchObj.pUsrp = (obj_t*) pUsr; + singleBatch.nElem = 1; /* there always is only one in direct mode */ + singleBatch.pElem = &batchObj; + iRet = pThis->pConsumer(pThis->pUsr, &singleBatch); + objDestruct(pUsr); RETiRet; } -static rsRetVal qDelDirect(qqueue_t __attribute__((unused)) *pThis, __attribute__((unused)) void **out) + +static rsRetVal qDelDirect(qqueue_t __attribute__((unused)) *pThis) { return RS_RET_OK; } - -/* --------------- end type-specific handlers -------------------- */ - - -/* unget a user pointer that has been dequeued. This functionality is especially important - * for consumer cancel cleanup handlers. To support it, a short list of ungotten user pointers - * is maintened in memory. - * rgerhards, 2008-01-20 - */ static rsRetVal -qqueueUngetObj(qqueue_t *pThis, obj_t *pUsr, int bLockMutex) +qUnDeqAllDirect(__attribute__((unused)) qqueue_t *pThis) { - DEFiRet; - DEFVARS_mutexProtection; - - ISOBJ_TYPE_assert(pThis, qqueue); - ISOBJ_assert(pUsr); /* TODO: we aborted right at this place at least 3 times -- race? 2008-02-28, -03-10, -03-15 - The second time I noticed it the queue was in destruction with NO worker threads - running. The pUsr ptr was totally off and provided no clue what it may be pointing - at (except that it looked like the static data pool). Both times, the abort happend - inside an action queue */ - - dbgoprint((obj_t*) pThis, "ungetting user object %s\n", obj.GetName(pUsr)); - BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, bLockMutex); - iRet = qqueueAddLinkedList(&pThis->pUngetRoot, &pThis->pUngetLast, pUsr); - ++pThis->iUngottenObjs; /* indicate one more */ - END_MTX_PROTECTED_OPERATIONS(pThis->mut); - - RETiRet; + return RS_RET_OK; } -/* dequeues a user pointer from the ungotten queue. Pointers from there should always be - * dequeued first. - * - * This function must only be called when the mutex is locked! - * - * rgerhards, 2008-01-29 - */ -static rsRetVal -qqueueGetUngottenObj(qqueue_t *pThis, obj_t **ppUsr) -{ - DEFiRet; - - ISOBJ_TYPE_assert(pThis, qqueue); - ASSERT(ppUsr != NULL); - - iRet = qqueueDelLinkedList(&pThis->pUngetRoot, &pThis->pUngetLast, ppUsr); - --pThis->iUngottenObjs; /* indicate one less */ - dbgoprint((obj_t*) pThis, "dequeued ungotten user object %s\n", obj.GetName(*ppUsr)); - - RETiRet; -} +/* --------------- end type-specific handlers -------------------- */ /* generic code to add a queue entry @@ -1027,7 +1126,8 @@ qqueueAdd(qqueue_t *pThis, void *pUsr) if(pThis->qType != QUEUETYPE_DIRECT) { ATOMIC_INC(pThis->iQueueSize); - dbgoprint((obj_t*) pThis, "entry added, size now %d entries\n", pThis->iQueueSize); + dbgoprint((obj_t*) pThis, "entry added, size now log %d, phys %d entries\n", + getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); } finalize_it: @@ -1035,12 +1135,10 @@ finalize_it: } -/* generic code to remove a queue entry - * rgerhards, 2008-01-29: we must first see if there is any object in the - * ungotten list and, if so, dequeue it first. +/* generic code to dequeue a queue entry */ static rsRetVal -qqueueDel(qqueue_t *pThis, void *pUsr) +qqueueDeq(qqueue_t *pThis, void **ppUsr) { DEFiRet; @@ -1051,53 +1149,37 @@ qqueueDel(qqueue_t *pThis, void *pUsr) * If we decrement, however, we may lose a message. But that is better than * losing the whole process because it loops... -- rgerhards, 2008-01-03 */ - if(pThis->iUngottenObjs > 0) { - iRet = qqueueGetUngottenObj(pThis, (obj_t**) pUsr); - } else { - iRet = pThis->qDel(pThis, pUsr); - ATOMIC_DEC(pThis->iQueueSize); - } + iRet = pThis->qDeq(pThis, ppUsr); + ATOMIC_INC(pThis->nLogDeq); - dbgoprint((obj_t*) pThis, "entry deleted, state %d, size now %d entries\n", - iRet, pThis->iQueueSize); +// dbgoprint((obj_t*) pThis, "entry deleted, size now log %d, phys %d entries\n", +// getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); RETiRet; } -/* This function shuts down all worker threads and waits until they - * have terminated. If they timeout, they are cancelled. Parameters have been set - * before this function is called so that DA queues will be fully persisted to - * disk (if configured to do so). - * rgerhards, 2008-01-24 - * Please note that this function shuts down BOTH the parent AND the child queue - * in DA case. This is necessary because their timeouts are tightly coupled. Most - * importantly, the timeouts would be applied twice (or logic be extremely - * complex) if each would have its own shutdown. The function does not self check - * this condition - the caller must make sure it is not called with a parent. +/* Try to terminate queue worker threads within the regular shutdown interval. + * Both the regular and DA queue (if it exists) is waited for, but on the same timeout. + * After this function returns, the workers must either be finished or some force + * to finish them must be applied. + * This function also instructs the DA worker pool (if it exists) to terminate. This is done + * in preparation of final queue shutdown. + * rgerhards, 2009-05-27 */ -static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis) +static rsRetVal +tryShutdownWorkersWithinQueueTimeout(qqueue_t *pThis) { - DEFiRet; - DEFVARS_mutexProtection; + DEFVARS_mutexProtection_uncond; struct timespec tTimeout; rsRetVal iRetLocal; + DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); ASSERT(pThis->pqParent == NULL); /* detect invalid calling sequence */ - dbgoprint((obj_t*) pThis, "initiating worker thread shutdown sequence\n"); - - /* we reduce the low water mark in any case. This is not absolutely necessary, but - * it is useful because we enable DA mode at several spots below and so we do not need - * to think about the low water mark each time. - */ - pThis->iHighWtrMrk = 1; /* if we do not do this, the DA queue will not stop! */ - pThis->iLowWtrMrk = 0; - - /* first try to shutdown the queue within the regular shutdown period */ - BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX); /* some workers may be running in parallel! */ - if(qqueueGetOverallQueueSize(pThis) > 0) { + BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(pThis->mut); /* some workers may be running in parallel! */ + if(getPhysicalQueueSize(pThis) > 0) { if(pThis->bRunsDA) { /* We may have waited on the low water mark. As it may have changed, we * see if we reactivate the worker. @@ -1105,7 +1187,12 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis) wtpAdviseMaxWorkers(pThis->pWtpDA, 1); } } - END_MTX_PROTECTED_OPERATIONS(pThis->mut); + END_MTX_PROTECTED_OPERATIONS_UNCOND(pThis->mut); + + /* at this stage, we need to have the DA worker properly initialized and running (if there is one) */ + if(pThis->bRunsDA) { + qqueueWaitDAModeInitialized(pThis); + } /* Now wait for the queue's workers to shut down. Note that we run into the code even if we just found * out there are no active workers - that doesn't matter: the wtp knows about that and so will @@ -1129,116 +1216,118 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis) if(iRetLocal == RS_RET_TIMED_OUT) { dbgoprint((obj_t*) pThis, "regular shutdown timed out on primary queue (this is OK)\n"); } else { - /* OK, the regular queue is now shut down. So we can now wait for the DA queue (if running DA) */ dbgoprint((obj_t*) pThis, "regular queue workers shut down.\n"); - BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX); /* some workers may be running in parallel! */ - if(pThis->bRunsDA) { - END_MTX_PROTECTED_OPERATIONS(pThis->mut); - dbgoprint((obj_t*) pThis, "we have a DA queue (0x%lx), requesting its shutdown.\n", - qqueueGetID(pThis->pqDA)); - /* we use the same absolute timeout as above, so we do not use more than the configured - * timeout interval! - */ - dbgoprint((obj_t*) pThis, "trying shutdown of DA workers\n"); - iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN, &tTimeout); - if(iRetLocal == RS_RET_TIMED_OUT) { - dbgoprint((obj_t*) pThis, "shutdown timed out on DA queue (this is OK)\n"); - } + } + + /* OK, the worker for the regular queue is processed, on the the DA queue regular worker. */ + if(pThis->bRunsDA) { + dbgoprint((obj_t*) pThis, "we have a DA queue (0x%lx), requesting its shutdown.\n", + qqueueGetID(pThis->pqDA)); + /* we use the same absolute timeout as above, so we do not use more than the configured + * timeout interval! + */ + dbgoprint((obj_t*) pThis, "trying shutdown of regular worker of DA queue\n"); + iRetLocal = wtpShutdownAll(pThis->pqDA->pWtpReg, wtpState_SHUTDOWN, &tTimeout); + if(iRetLocal == RS_RET_TIMED_OUT) { + dbgoprint((obj_t*) pThis, "shutdown timed out on DA queue worker (this is OK)\n"); + } else { + dbgoprint((obj_t*) pThis, "DA queue worker shut down.\n"); + } + /* we also instruct the DA worker pool to shutdown ASAP. If we need it for persisting + * the queue, it is restarted at a later stage. We don't care here if a timeout happens. + */ + dbgoprint((obj_t*) pThis, "trying shutdown of regular worker of DA queue\n"); + iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout); + if(iRetLocal == RS_RET_TIMED_OUT) { + dbgoprint((obj_t*) pThis, "shutdown timed out on main queue DA worker pool (this is OK)\n"); } else { - END_MTX_PROTECTED_OPERATIONS(pThis->mut); + dbgoprint((obj_t*) pThis, "main queue DA worker pool shut down.\n"); } } - /* when we reach this point, both queues are either empty or the regular queue shutdown timeout - * has expired. Now we need to check if we are configured to not loose messages. If so, we need - * to persist the queue to disk (this is only possible if the queue is DA-enabled). We must also - * set the primary queue to SHUTDOWN_IMMEDIATE, as it shall now terminate as soon as its consumer - * is done. This is especially important as we otherwise may interfere with queue order while the - * DA consumer is running. -- rgerhards, 2008-01-27 - * Note: there was a note that we should not wait eternally on the DA worker if we run in - * enqueue-only note. I have reviewed the code and think there is no need for this check. Howerver, - * I'd like to keep this note in here should we happen to run into some related trouble. - * rgerhards, 2008-01-28 - */ - wtpSetState(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE); /* set primary queue to shutdown only */ + RETiRet; +} - /* at this stage, we need to have the DA worker properly initialized and running (if there is one) */ - if(pThis->bRunsDA) - qqueueWaitDAModeInitialized(pThis); +/* Try to shut down regular and DA queue workers, within the action timeout + * period. Note that the main queue DA worker is still unaffected (and may shuffle + * data to the disk queue while we terminate the other workers). Not finishing + * processing all messages is now OK (but they may be preserved later, depending + * on bSaveOnShutdown setting). + * rgerhards, 2009-05-27 + */ +static rsRetVal +tryShutdownWorkersWithinActionTimeout(qqueue_t *pThis) +{ + DEFVARS_mutexProtection; + struct timespec tTimeout; + rsRetVal iRetLocal; + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + ASSERT(pThis->pqParent == NULL); /* detect invalid calling sequence */ + + /* instruct workers to finish ASAP, even if still work exists */ + /* note that we modify bEnqOnly direclty, because going through the method would + * startup some workers again. So this is OK here. -- rgerhards, 2009-05-28 + */ + pThis->bEnqOnly = 1; + wtpSetState(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE); + if(pThis->pqDA != NULL) { + pThis->pqDA->bEnqOnly = 1; + wtpSetState(pThis->pqDA->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE); + } + + /* now give the queue workers a last chance to gracefully shut down (based on action timeout setting) */ + timeoutComp(&tTimeout, pThis->toActShutdown); BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX); /* some workers may be running in parallel! */ - /* optimize parameters for shutdown of DA-enabled queues */ - if(pThis->bIsDA && qqueueGetOverallQueueSize(pThis) > 0 && pThis->bSaveOnShutdown) { - /* switch to enqueue-only mode so that no more actions happen */ - if(pThis->bRunsDA == 0) { - qqueueInitDA(pThis, QUEUE_MODE_ENQONLY, MUTEX_ALREADY_LOCKED); /* switch to DA mode */ - } else { - /* TODO: RACE: we may reach this point when the DA worker has been initialized (state 1) - * but is not yet running (state 2). In this case, pThis->pqDA is NULL! rgerhards, 2008-02-27 - */ - qqueueSetEnqOnly(pThis->pqDA, QUEUE_MODE_ENQONLY, MUTEX_ALREADY_LOCKED); /* switch to enqueue-only mode */ - } + if(wtpGetCurNumWrkr(pThis->pWtpReg, LOCK_MUTEX) > 0) { END_MTX_PROTECTED_OPERATIONS(pThis->mut); - /* make sure we do not timeout before we are done */ - dbgoprint((obj_t*) pThis, "bSaveOnShutdown configured, eternal timeout set\n"); - timeoutComp(&tTimeout, QUEUE_TIMEOUT_ETERNAL); - /* and run the primary queue's DA worker to drain the queue */ - iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN, &tTimeout); - if(iRetLocal != RS_RET_OK) { - dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying to shut down primary queue in disk save mode, " - "continuing, but results are unpredictable\n", iRetLocal); + dbgoprint((obj_t*) pThis, "trying immediate shutdown of regular workers\n"); + iRetLocal = wtpShutdownAll(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout); + if(iRetLocal == RS_RET_TIMED_OUT) { + dbgoprint((obj_t*) pThis, "immediate shutdown timed out on primary queue (this is acceptable and " + "triggers cancellation)\n"); + } else if(iRetLocal != RS_RET_OK) { + dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the primary queue " + "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal); } - } else { - END_MTX_PROTECTED_OPERATIONS(pThis->mut); + /* we need to re-aquire the mutex for the next check in this case! */ + BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX); } - /* now the primary queue is either empty, persisted to disk - or set to loose messages. So we - * can now request immediate shutdown of any remaining workers. Note that if bSaveOnShutdown was set, - * the queue is now empty. If regular workers are still running, and try to pull the next message, - * they will automatically terminate as there no longer is any message left to process. - */ - BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX); /* some workers may be running in parallel! */ - if(qqueueGetOverallQueueSize(pThis) > 0) { - timeoutComp(&tTimeout, pThis->toActShutdown); - if(wtpGetCurNumWrkr(pThis->pWtpReg, LOCK_MUTEX) > 0) { - END_MTX_PROTECTED_OPERATIONS(pThis->mut); - dbgoprint((obj_t*) pThis, "trying immediate shutdown of regular workers\n"); - iRetLocal = wtpShutdownAll(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout); - if(iRetLocal == RS_RET_TIMED_OUT) { - dbgoprint((obj_t*) pThis, "immediate shutdown timed out on primary queue (this is acceptable and " - "triggers cancellation)\n"); - } else if(iRetLocal != RS_RET_OK) { - dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the primary queue " - "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal); - } - /* we need to re-aquire the mutex for the next check in this case! */ - BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX); /* some workers may be running in parallel! */ - } - if(pThis->bIsDA && wtpGetCurNumWrkr(pThis->pWtpDA, LOCK_MUTEX) > 0) { - /* and now the same for the DA queue */ - END_MTX_PROTECTED_OPERATIONS(pThis->mut); - dbgoprint((obj_t*) pThis, "trying immediate shutdown of DA workers\n"); - iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout); - if(iRetLocal == RS_RET_TIMED_OUT) { - dbgoprint((obj_t*) pThis, "immediate shutdown timed out on DA queue (this is acceptable and " - "triggers cancellation)\n"); - } else if(iRetLocal != RS_RET_OK) { - dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the DA queue " - "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal); - } - } else { - END_MTX_PROTECTED_OPERATIONS(pThis->mut); - } - } else { + if(pThis->bRunsDA && wtpGetCurNumWrkr(pThis->pqDA->pWtpReg, LOCK_MUTEX) > 0) { + /* and now the same for the DA queue */ END_MTX_PROTECTED_OPERATIONS(pThis->mut); + dbgoprint((obj_t*) pThis, "trying immediate shutdown of DA queue workers\n"); + iRetLocal = wtpShutdownAll(pThis->pqDA->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout); + if(iRetLocal == RS_RET_TIMED_OUT) { + dbgoprint((obj_t*) pThis, "immediate shutdown timed out on DA queue (this is acceptable and " + "triggers cancellation)\n"); + } else if(iRetLocal != RS_RET_OK) { + dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the DA queue " + "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal); + } } + END_MTX_PROTECTED_OPERATIONS(pThis->mut); + + RETiRet; +} + + +/* This function cancels all remenaing regular workers for both the main and the DA + * queue. The main queue's DA worker pool continues to run (if it exists and is active). + * rgerhards, 2009-05-29 + */ +static rsRetVal +cancelWorkers(qqueue_t *pThis) +{ + rsRetVal iRetLocal; + DEFiRet; /* Now queue workers should have terminated. If not, we need to cancel them as we have applied * all timeout setting. If any worker in any queue still executes, its consumer is possibly - * long-running and cancelling is the only way to get rid of it. Note that the - * cancellation handler will probably re-queue a user pointer, so the queue's enqueue - * function is still needed (what is no problem as we do not yet destroy the queue - but I - * thought it's a good idea to mention that fact). -- rgerhards, 2008-01-25 + * long-running and cancelling is the only way to get rid of it. */ dbgoprint((obj_t*) pThis, "checking to see if we need to cancel any worker threads of the primary queue\n"); iRetLocal = wtpCancelAll(pThis->pWtpReg); /* returns immediately if all threads already have terminated */ @@ -1247,12 +1336,6 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis) "threads, continuing, but results are unpredictable\n", iRetLocal); } - - /* TODO: think: do we really need to do this here? Can't it happen on DA queue destruction? If we - * disable it, we get an assertion... I think this is OK, as we need to have a certain order and - * canceling the DA workers here ensures that order. But in any instant, we may have a look at this - * code after we have reaced the milestone. -- rgerhards, 2008-01-27 - */ /* ... and now the DA queue, if it exists (should always be after the primary one) */ if(pThis->pqDA != NULL) { dbgoprint((obj_t*) pThis, "checking to see if we need to cancel any worker threads of the DA queue\n"); @@ -1261,14 +1344,70 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis) dbgoprint((obj_t*) pThis, "unexpected iRet state %d trying to cancel DA queue worker " "threads, continuing, but results are unpredictable\n", iRetLocal); } + + /* finally, we cancel the main queue's DA worker pool, if it still is running. It may be + * restarted later to persist the queue. But we stop it, because otherwise we get into + * big trouble when resetting the logical dequeue pointer. This operation can only be + * done when *no* worker is running. So time for a shutdown... -- rgerhards, 2009-05-28 + */ + dbgoprint((obj_t*) pThis, "checking to see if we need to cancel the main queue's DA worker pool\n"); + iRetLocal = wtpCancelAll(pThis->pWtpDA); /* returns immediately if all threads already have terminated */ } + RETiRet; +} + + +/* This function shuts down all worker threads and waits until they + * have terminated. If they timeout, they are cancelled. + * rgerhards, 2008-01-24 + * Please note that this function shuts down BOTH the parent AND the child queue + * in DA case. This is necessary because their timeouts are tightly coupled. Most + * importantly, the timeouts would be applied twice (or logic be extremely + * complex) if each would have its own shutdown. The function does not self check + * this condition - the caller must make sure it is not called with a parent. + * rgerhards, 2009-05-26: we do NO longer persist the queue here if bSaveOnShutdown + * is set. This must be handled by the caller. Not doing that cleans up the queue + * shutdown considerably. Also, older engines had a potential hang condition when + * the DA queue was already started and the DA worker configured for infinite + * retries and the action was during retry processing. This was a design issue, + * which is solved as of now. Note that the shutdown now may take a little bit + * longer, because we no longer can persist the queue in parallel to waiting + * on worker timeouts. + */ +static rsRetVal +ShutdownWorkers(qqueue_t *pThis) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + ASSERT(pThis->pqParent == NULL); /* detect invalid calling sequence */ + + dbgoprint((obj_t*) pThis, "initiating worker thread shutdown sequence\n"); + + /* we reduce the low water mark in any case. This is not absolutely necessary, but + * it is useful because we enable DA mode at several spots below and so we do not need + * to think about the low water mark each time. + */ + pThis->iHighWtrMrk = 1; /* if we do not do this, the DA queue will not stop! */ + pThis->iLowWtrMrk = 0; + + CHKiRet(tryShutdownWorkersWithinQueueTimeout(pThis)); + + if(getPhysicalQueueSize(pThis) > 0) { + CHKiRet(tryShutdownWorkersWithinActionTimeout(pThis)); + } + + CHKiRet(cancelWorkers(pThis)); + /* ... finally ... all worker threads have terminated :-) * Well, more precisely, they *are in termination*. Some cancel cleanup handlers - * may still be running. + * may still be running. Note that the main queue's DA worker may still be running. */ - dbgoprint((obj_t*) pThis, "worker threads terminated, remaining queue size %d.\n", qqueueGetOverallQueueSize(pThis)); + dbgoprint((obj_t*) pThis, "worker threads terminated, remaining queue size log %d, phys %d.\n", + getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); +finalize_it: RETiRet; } @@ -1280,7 +1419,7 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis) * to modify some parameters before the queue is actually started. */ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThreads, - int iMaxQueueSize, rsRetVal (*pConsumer)(void*,void*)) + int iMaxQueueSize, rsRetVal (*pConsumer)(void*, batch_t*)) { DEFiRet; qqueue_t *pThis; @@ -1305,10 +1444,12 @@ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThread pThis->lenSpoolDir = strlen((char*)pThis->pszSpoolDir); pThis->iMaxFileSize = 1024 * 1024; /* default is 1 MiB */ pThis->iQueueSize = 0; + pThis->nLogDeq = 0; pThis->iMaxQueueSize = iMaxQueueSize; pThis->pConsumer = pConsumer; pThis->iNumWorkerThreads = iWorkerThreads; pThis->iDeqtWinToHr = 25; /* disable time-windowed dequeuing by default */ + pThis->iDeqBatchSize = 8; /* conservative default, should still provide good performance */ pThis->pszFilePrefix = NULL; pThis->qType = qType; @@ -1319,19 +1460,25 @@ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThread pThis->qConstruct = qConstructFixedArray; pThis->qDestruct = qDestructFixedArray; pThis->qAdd = qAddFixedArray; + pThis->qDeq = qDeqFixedArray; pThis->qDel = qDelFixedArray; + pThis->qUnDeqAll = qUnDeqAllFixedArray; break; case QUEUETYPE_LINKEDLIST: pThis->qConstruct = qConstructLinkedList; pThis->qDestruct = qDestructLinkedList; pThis->qAdd = qAddLinkedList; - pThis->qDel = (rsRetVal (*)(qqueue_t*,void**)) qDelLinkedList; + pThis->qDeq = (rsRetVal (*)(qqueue_t*,void**)) qDeqLinkedList; + pThis->qDel = (rsRetVal (*)(qqueue_t*)) qDelLinkedList; + pThis->qUnDeqAll = qUnDeqAllLinkedList; break; case QUEUETYPE_DISK: pThis->qConstruct = qConstructDisk; pThis->qDestruct = qDestructDisk; pThis->qAdd = qAddDisk; + pThis->qDeq = qDeqDisk; pThis->qDel = qDelDisk; + pThis->qUnDeqAll = qUnDeqAllDisk; /* special handling */ pThis->iNumWorkerThreads = 1; /* we need exactly one worker */ break; @@ -1340,6 +1487,7 @@ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThread pThis->qDestruct = qDestructDirect; pThis->qAdd = qAddDirect; pThis->qDel = qDelDirect; + pThis->qUnDeqAll = qUnDeqAllDirect; break; } @@ -1349,36 +1497,6 @@ finalize_it: } -/* cancellation cleanup handler for queueWorker () - * Updates admin structure and frees ressources. - * Params: - * arg1 - user pointer (in this case a qqueue_t) - * arg2 - user data pointer (in this case a queue data element, any object [queue's pUsr ptr!]) - * Note that arg2 may be NULL, in which case no dequeued but unprocessed pUsr exists! - * rgerhards, 2008-01-16 - */ -static rsRetVal -qqueueConsumerCancelCleanup(void *arg1, void *arg2) -{ - DEFiRet; - - qqueue_t *pThis = (qqueue_t*) arg1; - obj_t *pUsr = (obj_t*) arg2; - - ISOBJ_TYPE_assert(pThis, qqueue); - - if(pUsr != NULL) { - /* make sure the data element is not lost */ - dbgoprint((obj_t*) pThis, "cancelation cleanup handler consumer called, we need to unget one user data element\n"); - CHKiRet(qqueueUngetObj(pThis, pUsr, LOCK_MUTEX)); - } - -finalize_it: - RETiRet; -} - - - /* This function checks if the provided message shall be discarded and does so, if needed. * In DA mode, we do not discard any messages as we assume the disk subsystem is fast enough to * provide real-time creation of spool files. @@ -1419,38 +1537,171 @@ finalize_it: } -/* dequeue the queued object for the queue consumers. - * rgerhards, 2008-10-21 +/* Finally remove n elements from the queue store. */ -static rsRetVal -qqueueDequeueConsumable(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) +static inline rsRetVal +DoDeleteBatchFromQStore(qqueue_t *pThis, int nElem) +{ + int i; + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + + /* now send delete request to storage driver */ + for(i = 0 ; i < nElem ; ++i) { + pThis->qDel(pThis); + } + + /* iQueueSize is not decremented by qDel(), so we need to do it ourselves */ + ATOMIC_SUB(pThis->iQueueSize, nElem); + ATOMIC_SUB(pThis->nLogDeq, nElem); +dbgprintf("delete batch from store, new sizes: log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + ++pThis->deqIDDel; /* one more batch dequeued */ + + RETiRet; +} + + +/* remove messages from the physical queue store that are fully processed. This is + * controlled via the to-delete list. We can only delete those elements, that are + * at the current physical tail of the queue. If the batch is from another position, + * we schedule it for deletion, but actual deletion will happen at a later call + * of this function here. We always delete as much as possible, which includes + * picking up things from the to-delete list. + */ +static inline rsRetVal +DeleteBatchFromQStore(qqueue_t *pThis, batch_t *pBatch) { + toDeleteLst_t *pTdl; + qDeqID deqIDDel; DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + assert(pBatch != NULL); + + pTdl = tdlPeek(pThis); /* get current head element */ + if(pTdl == NULL) { /* to-delete list empty */ + DoDeleteBatchFromQStore(pThis, pBatch->nElemDeq); + } else if(pBatch->deqID == pThis->deqIDDel) { + deqIDDel = pThis->deqIDDel; + pTdl = tdlPeek(pThis); + while(pTdl != NULL && deqIDDel == pTdl->deqID) { + DoDeleteBatchFromQStore(pThis, pTdl->nElemDeq); + tdlPop(pThis); + ++deqIDDel; + pTdl = tdlPeek(pThis); + } + } else { + /* can not delete, insert into to-delete list */ + dbgprintf("not at head of to-delete list, enqueue %d\n", (int) pBatch->deqID); + CHKiRet(tdlAdd(pThis, pBatch->deqID, pBatch->nElemDeq)); + } + +finalize_it: + RETiRet; +} + + +/* Delete a batch of processed user objects from the queue, which includes + * destructing the objects themself. + * rgerhards, 2009-05-13 + */ +static inline rsRetVal +DeleteProcessedBatch(qqueue_t *pThis, batch_t *pBatch) +{ + int i; void *pUsr; + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + assert(pBatch != NULL); + + for(i = 0 ; i < pBatch->nElem ; ++i) { + pUsr = pBatch->pElem[i].pUsrp; + objDestruct(pUsr); + } + + iRet = DeleteBatchFromQStore(pThis, pBatch); + + pBatch->nElem = pBatch->nElemDeq = 0; /* reset batch */ + + RETiRet; +} + + +/* dequeue as many user pointers as are available, until we hit the configured + * upper limit of pointers. + * This must only be called when the queue mutex is LOOKED, otherwise serious + * malfunction will happen. + */ +static inline rsRetVal +DequeueConsumableElements(qqueue_t *pThis, wti_t *pWti, int *piRemainingQueueSize) +{ + int nDequeued; + int nDiscarded; + int nDeleted; int iQueueSize; - int bRunsDA; /* cache for early mutex release */ - - /* dequeue element (still protected from mutex) */ - iRet = qqueueDel(pThis, &pUsr); - qqueueChkPersist(pThis); - iQueueSize = qqueueGetOverallQueueSize(pThis); /* cache this for after mutex release */ - bRunsDA = pThis->bRunsDA; /* cache this for after mutex release */ - - /* We now need to save the user pointer for the cancel cleanup handler, BUT ONLY - * if we could successfully obtain a user pointer. Otherwise, we would bring the - * cancel cleanup handler into big troubles (and we did ;)). Note that we can - * NOT set the variable further below, as this may lead to an object leak. We - * may get cancelled before we reach that part of the code, so the only - * solution is to do it here. -- rgerhards, 2008-02-27 - */ - if(iRet == RS_RET_OK) { - pWti->pUsrp = pUsr; + void *pUsr; + rsRetVal localRet; + DEFiRet; + + nDeleted = pWti->batch.nElemDeq; + DeleteProcessedBatch(pThis, &pWti->batch); + + nDequeued = nDiscarded = 0; + while((iQueueSize = getLogicalQueueSize(pThis)) > 0 && nDequeued < pThis->iDeqBatchSize) { +dbgprintf("DequeueConsumableElements, index %d\n", nDequeued); + CHKiRet(qqueueDeq(pThis, &pUsr)); + + /* check if we should discard this element */ + localRet = qqueueChkDiscardMsg(pThis, pThis->iQueueSize, pThis->bRunsDA, pUsr); + if(localRet == RS_RET_QUEUE_FULL) { + ++nDiscarded; + continue; + } else if(localRet != RS_RET_OK) { + ABORT_FINALIZE(localRet); + } + + /* all well, use this element */ + pWti->batch.pElem[nDequeued].pUsrp = pUsr; + pWti->batch.pElem[nDequeued].state = BATCH_STATE_RDY; + ++nDequeued; } + /* it is sufficient to persist only when the bulk of work is done */ + qqueueChkPersist(pThis, nDequeued+nDiscarded+nDeleted); + + pWti->batch.nElem = nDequeued; + pWti->batch.nElemDeq = nDequeued + nDiscarded; + pWti->batch.deqID = getNextDeqID(pThis); + *piRemainingQueueSize = iQueueSize; + +finalize_it: + RETiRet; +} + + +/* dequeue the queued object for the queue consumers. + * rgerhards, 2008-10-21 + * I made a radical change - we now dequeue multiple elements, and store these objects in + * an array of user pointers. We expect that this increases performance. + * rgerhards, 2009-04-22 + */ +static rsRetVal +DequeueConsumable(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) +{ + DEFiRet; + int iQueueSize = 0; /* keep the compiler happy... */ + + /* dequeue element batch (still protected from mutex) */ + iRet = DequeueConsumableElements(pThis, pWti, &iQueueSize); + /* awake some flow-controlled sources if we can do this right now */ /* TODO: this could be done better from a performance point of view -- do it only if * we have someone waiting for the condition (or only when we hit the watermark right * on the nail [exact value]) -- rgerhards, 2008-03-14 + * now that we dequeue batches of pointers, this is much less an issue... + * rgerhards, 2009-04-22 */ if(iQueueSize < pThis->iFullDlyMrk / 2) { pthread_cond_broadcast(&pThis->belowFullDlyWtrMrk); @@ -1460,37 +1711,17 @@ qqueueDequeueConsumable(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) pthread_cond_broadcast(&pThis->belowLightDlyWtrMrk); } - /* rgerhards, 2008-09-30: I reversed the order of cond_signal und mutex_unlock - * as of the pthreads recommendation on predictable scheduling behaviour. I don't see - * any problems caused by this, but I add this comment in case some will be seen - * in the next time. - */ + // TODO: MULTI: check physical queue size? pthread_cond_signal(&pThis->notFull); d_pthread_mutex_unlock(pThis->mut); pthread_setcancelstate(iCancelStateSave, NULL); /* WE ARE NO LONGER PROTECTED BY THE MUTEX */ - /* do actual processing (the lengthy part, runs in parallel) - * If we had a problem while dequeing, we do not call the consumer, - * but we otherwise ignore it. This is in the hopes that it will be - * self-healing. However, this is really not a good thing. - * rgerhards, 2008-01-03 - */ - if(iRet != RS_RET_OK) - FINALIZE; - - /* we are running in normal, non-disk-assisted mode do a quick check if we need to drain the queue. - * In DA mode, we do not discard any messages as we assume the disk subsystem is fast enough to - * provide real-time creation of spool files. - * Note: It is OK to use the cached iQueueSize here, because it does not hurt if it is slightly wrong. - */ - CHKiRet(qqueueChkDiscardMsg(pThis, iQueueSize, bRunsDA, pUsr)); - -finalize_it: if(iRet != RS_RET_OK && iRet != RS_RET_DISCARDMSG) { dbgoprint((obj_t*) pThis, "error %d dequeueing element - ignoring, but strange things " "may happen\n", iRet); } + RETiRet; } @@ -1533,7 +1764,7 @@ finalize_it: * but you get the idea from the code above. */ static rsRetVal -qqueueRateLimiter(qqueue_t *pThis) +RateLimiter(qqueue_t *pThis) { DEFiRet; int iDelay; @@ -1590,25 +1821,68 @@ qqueueRateLimiter(qqueue_t *pThis) } +/* This dequeues the next batch and checks if the queue is empty. If it is + * empty, return RS_RET_IDLE. That will trigger termination of the function + * and tell the upper layer caller to initiate idle processing. + * rgerhards, 2009-05-20 + */ +static inline rsRetVal +DequeueForConsumer(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + ISOBJ_TYPE_assert(pWti, wti); + + CHKiRet(DequeueConsumable(pThis, pWti, iCancelStateSave)); + + if(pWti->batch.nElem == 0) + ABORT_FINALIZE(RS_RET_IDLE); + +finalize_it: + RETiRet; +} + + +/* This is called when a batch is processed and the worker does not + * ask for another batch (e.g. because it is to be terminated) + * rgerhards, 2009-05-27 + */ +static rsRetVal +batchProcessed(qqueue_t *pThis, wti_t *pWti) +{ + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + ISOBJ_TYPE_assert(pWti, wti); +dbgprintf("XXX: batchProcessed deletes %d records\n", pWti->batch.nElemDeq); + + DeleteProcessedBatch(pThis, &pWti->batch); + qqueueChkPersist(pThis, pWti->batch.nElemDeq); + + RETiRet; +} + /* This is the queue consumer in the regular (non-DA) case. It is * protected by the queue mutex, but MUST release it as soon as possible. * rgerhards, 2008-01-21 */ static rsRetVal -qqueueConsumerReg(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) +ConsumerReg(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) { DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); ISOBJ_TYPE_assert(pWti, wti); - CHKiRet(qqueueDequeueConsumable(pThis, pWti, iCancelStateSave)); - CHKiRet(pThis->pConsumer(pThis->pUsr, pWti->pUsrp)); + CHKiRet(DequeueForConsumer(pThis, pWti, iCancelStateSave)); + CHKiRet(pThis->pConsumer(pThis->pUsr, &pWti->batch)); /* we now need to check if we should deliberately delay processing a bit * and, if so, do that. -- rgerhards, 2008-01-30 */ +//TODO: MULTIQUEUE: the following setting is no longer correct - need to think about how to do that... if(pThis->iDeqSlowdown) { dbgoprint((obj_t*) pThis, "sleeping %d microseconds as requested by config params\n", pThis->iDeqSlowdown); @@ -1616,11 +1890,12 @@ qqueueConsumerReg(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) } finalize_it: +dbgprintf("XXX: regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet, getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); RETiRet; } -/* This is a special consumer to feed the disk-queue in disk-assited mode. +/* This is a special consumer to feed the disk-queue in disk-assisted mode. * When active, our own queue more or less acts as a memory buffer to the disk. * So this consumer just needs to drain the memory queue and submit entries * to the disk queue. The disk queue will then call the actual consumer from @@ -1630,15 +1905,23 @@ finalize_it: * rgerhards, 2008-01-14 */ static rsRetVal -qqueueConsumerDA(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) +ConsumerDA(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave) { + int i; DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); ISOBJ_TYPE_assert(pWti, wti); - CHKiRet(qqueueDequeueConsumable(pThis, pWti, iCancelStateSave)); - CHKiRet(qqueueEnqObj(pThis->pqDA, eFLOWCTL_NO_DELAY, pWti->pUsrp)); + CHKiRet(DequeueForConsumer(pThis, pWti, iCancelStateSave)); + /* iterate over returned results and enqueue them in DA queue */ + for(i = 0 ; i < pWti->batch.nElem ; i++) { + /* TODO: we must add a generic "addRef" mechanism, because the disk queue enqueue destructs + * the message. So far, we simply assume we always have msg_t, what currently is always the case. + * rgerhards, 2009-05-28 + */ + CHKiRet(qqueueEnqObj(pThis->pqDA, eFLOWCTL_NO_DELAY, (obj_t*)MsgAddRef((msg_t*)(pWti->batch.pElem[i].pUsrp)))); + } finalize_it: dbgoprint((obj_t*) pThis, "DAConsumer returns with iRet %d\n", iRet); @@ -1651,20 +1934,17 @@ finalize_it: * If we are a child, we have done our duty when the queue is empty. In that case, * we can terminate. * Version for the DA worker thread. NOTE: the pThis->bRunsDA is different from - * the DA queue + * the DA queue. + * If our queue is in destruction, we drain to the DA queue and so we shall not terminate + * until we have done so. */ -static int +static rsRetVal qqueueChkStopWrkrDA(qqueue_t *pThis) { - /* if our queue is in destruction, we drain to the DA queue and so we shall not terminate - * until we have done so. - */ - int bStopWrkr; - - BEGINfunc + DEFiRet; if(pThis->bEnqOnly) { - bStopWrkr = 1; + iRet = RS_RET_TERMINATE_WHEN_IDLE; } else { if(pThis->bRunsDA) { ASSERT(pThis->pqDA != NULL); @@ -1672,19 +1952,21 @@ qqueueChkStopWrkrDA(qqueue_t *pThis) && pThis->pqDA->sizeOnDiskMax > 0 && pThis->pqDA->tVars.disk.sizeOnDisk > pThis->pqDA->sizeOnDiskMax) { /* this queue can never grow, so we can give up... */ - bStopWrkr = 1; - } else if(qqueueGetOverallQueueSize(pThis) < pThis->iHighWtrMrk && pThis->bQueueStarted == 1) { - bStopWrkr = 1; - } else { - bStopWrkr = 0; + iRet = RS_RET_TERMINATE_NOW; + } else if(getPhysicalQueueSize(pThis) < pThis->iHighWtrMrk && pThis->bQueueStarted == 1) { +dbgprintf("XXX: terminate_NOW DA worker: queue size %d, high water mark %d\n", getPhysicalQueueSize(pThis), pThis->iHighWtrMrk); + iRet = RS_RET_TERMINATE_NOW; +RUNLOG_STR("XXX: re-start reg worker"); +qqueueAdviseMaxWorkers(pThis); +RUNLOG_STR("XXX: done re-start reg worker"); } } else { - bStopWrkr = 1; + // experimental iRet = RS_RET_TERMINATE_NOW; + ; } } - ENDfunc - return bStopWrkr; + RETiRet; } @@ -1695,38 +1977,51 @@ qqueueChkStopWrkrDA(qqueue_t *pThis) * Version for the regular worker thread. NOTE: the pThis->bRunsDA is different from * the DA queue */ -static int -qqueueChkStopWrkrReg(qqueue_t *pThis) +static rsRetVal +ChkStopWrkrReg(qqueue_t *pThis) { - return pThis->bEnqOnly || pThis->bRunsDA || (pThis->pqParent != NULL && qqueueGetOverallQueueSize(pThis) == 0); + DEFiRet; + if(pThis->bEnqOnly) { +dbgprintf("XXX: terminate_NOW queue:Reg worker: enqOnly! queue size %d\n", getPhysicalQueueSize(pThis)); + iRet = RS_RET_TERMINATE_NOW; + } else if(pThis->pqParent != NULL) { + iRet = RS_RET_TERMINATE_WHEN_IDLE; + } + + RETiRet; +} + + +/* return the configured "deq max at once" interval + * rgerhards, 2009-04-22 + */ +static rsRetVal +GetDeqBatchSize(qqueue_t *pThis, int *pVal) +{ + DEFiRet; + assert(pVal != NULL); + *pVal = pThis->iDeqBatchSize; +if(pThis->pqParent != NULL) + *pVal = 16; + RETiRet; } /* must only be called when the queue mutex is locked, else results - * are not stable! DA queue version + * are not stable! DA worker version (pThis *is* the *main* queue, not DA!) */ static int qqueueIsIdleDA(qqueue_t *pThis) { - /* remember: iQueueSize is the DA queue size, not the main queue! */ - /* TODO: I think we need just a single function for DA and non-DA mode - but I leave it for now as is */ - return(qqueueGetOverallQueueSize(pThis) == 0 || (pThis->bRunsDA && qqueueGetOverallQueueSize(pThis) <= pThis->iLowWtrMrk)); + return(getPhysicalQueueSize(pThis) <= pThis->iLowWtrMrk); } /* must only be called when the queue mutex is locked, else results - * are not stable! Regular queue version + * are not stable! Regular worker version. */ static int -qqueueIsIdleReg(qqueue_t *pThis) -{ -#if 0 /* enable for performance testing */ - int ret; - ret = qqueueGetOverallQueueSize(pThis) == 0 || (pThis->bRunsDA && qqueueGetOverallQueueSize(pThis) <= pThis->iLowWtrMrk); - if(ret) fprintf(stderr, "queue is idle\n"); - return ret; -#else - /* regular code! */ - return(qqueueGetOverallQueueSize(pThis) == 0 || (pThis->bRunsDA && qqueueGetOverallQueueSize(pThis) <= pThis->iLowWtrMrk)); -#endif +IsIdleReg(qqueue_t *pThis) +{ + return(getPhysicalQueueSize(pThis) == 0); } @@ -1744,7 +2039,7 @@ qqueueIsIdleReg(qqueue_t *pThis) * I am telling this, because I, too, always get confused by those... */ static rsRetVal -qqueueRegOnWrkrShutdown(qqueue_t *pThis) +RegOnWrkrShutdown(qqueue_t *pThis) { DEFiRet; @@ -1765,7 +2060,7 @@ qqueueRegOnWrkrShutdown(qqueue_t *pThis) * hook to indicate in the parent queue (if we are a child) that we are not done yet. */ static rsRetVal -qqueueRegOnWrkrStartup(qqueue_t *pThis) +RegOnWrkrStartup(qqueue_t *pThis) { DEFiRet; @@ -1782,7 +2077,8 @@ qqueueRegOnWrkrStartup(qqueue_t *pThis) /* start up the queue - it must have been constructed and parameters defined * before. */ -rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ +rsRetVal +qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ { DEFiRet; rsRetVal iRetLocal; @@ -1818,11 +2114,12 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ /* call type-specific constructor */ CHKiRet(pThis->qConstruct(pThis)); /* this also sets bIsDA */ - dbgoprint((obj_t*) pThis, "type %d, enq-only %d, disk assisted %d, maxFileSz %lld, qsize %d, child %d, " - "full delay %d, light delay %d starting\n", + dbgoprint((obj_t*) pThis, "type %d, enq-only %d, disk assisted %d, maxFileSz %lld, lqsize %d, pqsize %d, child %d, " + "full delay %d, light delay %d, deq batch size %d starting\n", pThis->qType, pThis->bEnqOnly, pThis->bIsDA, pThis->iMaxFileSize, - qqueueGetOverallQueueSize(pThis), pThis->pqParent == NULL ? 0 : 1, - pThis->iFullDlyMrk, pThis->iLightDlyMrk); + getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis), + pThis->pqParent == NULL ? 0 : 1, pThis->iFullDlyMrk, pThis->iLightDlyMrk, + pThis->iDeqBatchSize); if(pThis->qType == QUEUETYPE_DIRECT) FINALIZE; /* with direct queues, we are already finished... */ @@ -1833,13 +2130,14 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ lenBuf = snprintf((char*)pszBuf, sizeof(pszBuf), "%s:Reg", obj.GetName((obj_t*) pThis)); CHKiRet(wtpConstruct (&pThis->pWtpReg)); CHKiRet(wtpSetDbgHdr (pThis->pWtpReg, pszBuf, lenBuf)); - CHKiRet(wtpSetpfRateLimiter (pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) qqueueRateLimiter)); - CHKiRet(wtpSetpfChkStopWrkr (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int)) qqueueChkStopWrkrReg)); - CHKiRet(wtpSetpfIsIdle (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int)) qqueueIsIdleReg)); - CHKiRet(wtpSetpfDoWork (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, void *pWti, int)) qqueueConsumerReg)); - CHKiRet(wtpSetpfOnWorkerCancel (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, void*pWti))qqueueConsumerCancelCleanup)); - CHKiRet(wtpSetpfOnWorkerStartup (pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) qqueueRegOnWrkrStartup)); - CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) qqueueRegOnWrkrShutdown)); + CHKiRet(wtpSetpfRateLimiter (pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) RateLimiter)); + CHKiRet(wtpSetpfChkStopWrkr (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int)) ChkStopWrkrReg)); + CHKiRet(wtpSetpfGetDeqBatchSize (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int*)) GetDeqBatchSize)); + CHKiRet(wtpSetpfIsIdle (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, wtp_t*)) IsIdleReg)); + CHKiRet(wtpSetpfDoWork (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, void *pWti, int)) ConsumerReg)); + CHKiRet(wtpSetpfObjProcessed (pThis->pWtpReg, (rsRetVal (*)(void *pUsr, wti_t *pWti)) batchProcessed)); + CHKiRet(wtpSetpfOnWorkerStartup (pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) RegOnWrkrStartup)); + CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) RegOnWrkrShutdown)); CHKiRet(wtpSetpmutUsr (pThis->pWtpReg, pThis->mut)); CHKiRet(wtpSetpcondBusy (pThis->pWtpReg, &pThis->notEmpty)); CHKiRet(wtpSetiNumWorkerThreads (pThis->pWtpReg, pThis->iNumWorkerThreads)); @@ -1855,7 +2153,7 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ iRetLocal = qqueueHaveQIF(pThis); if(iRetLocal == RS_RET_OK) { dbgoprint((obj_t*) pThis, "on-disk queue present, needs to be reloaded\n"); - qqueueInitDA(pThis, QUEUE_MODE_ENQDEQ, LOCK_MUTEX); /* initiate DA mode */ + InitDA(pThis, QUEUE_MODE_ENQDEQ, LOCK_MUTEX); /* initiate DA mode */ bInitialized = 1; /* we are done */ } else { /* TODO: use logerror? -- rgerhards, 2008-01-16 */ @@ -1864,7 +2162,7 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */ } } - if(!bInitialized) { + if(Debug && !bInitialized) { dbgoprint((obj_t*) pThis, "queue starts up without (loading) any DA disk state (this is normal for the DA " "queue itself!)\n"); } @@ -1893,12 +2191,11 @@ static rsRetVal qqueuePersist(qqueue_t *pThis, int bIsCheckpoint) strm_t *psQIF = NULL; /* Queue Info File */ uchar pszQIFNam[MAXFNAME]; size_t lenQIFNam; - obj_t *pUsr; ASSERT(pThis != NULL); if(pThis->qType != QUEUETYPE_DISK) { - if(qqueueGetOverallQueueSize(pThis) > 0) { + if(getPhysicalQueueSize(pThis) > 0) { /* This error code is OK, but we will probably not implement this any time * The reason is that persistence happens via DA queues. But I would like to * leave the code as is, as we so have a hook in case we need one. @@ -1909,19 +2206,19 @@ static rsRetVal qqueuePersist(qqueue_t *pThis, int bIsCheckpoint) FINALIZE; /* if the queue is empty, we are happy and done... */ } - dbgoprint((obj_t*) pThis, "persisting queue to disk, %d entries...\n", qqueueGetOverallQueueSize(pThis)); + dbgoprint((obj_t*) pThis, "persisting queue to disk, %d entries...\n", getPhysicalQueueSize(pThis)); /* Construct file name */ lenQIFNam = snprintf((char*)pszQIFNam, sizeof(pszQIFNam) / sizeof(uchar), "%s/%s.qi", (char*) glbl.GetWorkDir(), (char*)pThis->pszFilePrefix); - if((bIsCheckpoint != QUEUE_CHECKPOINT) && (qqueueGetOverallQueueSize(pThis) == 0)) { + if((bIsCheckpoint != QUEUE_CHECKPOINT) && (getPhysicalQueueSize(pThis) == 0)) { if(pThis->bNeedDelQIF) { unlink((char*)pszQIFNam); pThis->bNeedDelQIF = 0; } /* indicate spool file needs to be deleted */ - CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pRead, 1)); + CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDel, 1)); FINALIZE; /* nothing left to do, so be happy */ } @@ -1940,29 +2237,19 @@ static rsRetVal qqueuePersist(qqueue_t *pThis, int bIsCheckpoint) */ CHKiRet(obj.BeginSerializePropBag(psQIF, (obj_t*) pThis)); objSerializeSCALAR(psQIF, iQueueSize, INT); - objSerializeSCALAR(psQIF, iUngottenObjs, INT); objSerializeSCALAR(psQIF, tVars.disk.sizeOnDisk, INT64); objSerializeSCALAR(psQIF, tVars.disk.bytesRead, INT64); CHKiRet(obj.EndSerialize(psQIF)); - /* now we must persist all objects on the ungotten queue - they can not go to - * to the regular files. -- rgerhards, 2008-01-29 - */ - while(pThis->iUngottenObjs > 0) { - CHKiRet(qqueueGetUngottenObj(pThis, &pUsr)); - CHKiRet((objSerialize(pUsr))(pUsr, psQIF)); - objDestruct(pUsr); - } - /* now persist the stream info */ CHKiRet(strm.Serialize(pThis->tVars.disk.pWrite, psQIF)); - CHKiRet(strm.Serialize(pThis->tVars.disk.pRead, psQIF)); + CHKiRet(strm.Serialize(pThis->tVars.disk.pReadDel, psQIF)); /* tell the input file object that it must not delete the file on close if the queue * is non-empty - but only if we are not during a simple checkpoint */ if(bIsCheckpoint != QUEUE_CHECKPOINT) { - CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pRead, 0)); + CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDel, 0)); } /* we have persisted the queue object. So whenever it comes to an empty queue, @@ -1979,20 +2266,67 @@ finalize_it: /* check if we need to persist the current queue info. If an - * error occurs, thus should be ignored by caller (but we still + * error occurs, this should be ignored by caller (but we still * abide to our regular call interface)... * rgerhards, 2008-01-13 + * nUpdates is the number of updates since the last call to this function. + * It may be > 1 due to batches. -- rgerhards, 2009-05-12 */ -static rsRetVal qqueueChkPersist(qqueue_t *pThis) +static rsRetVal qqueueChkPersist(qqueue_t *pThis, int nUpdates) { + DEFiRet; ISOBJ_TYPE_assert(pThis, qqueue); + assert(nUpdates >= 0); - if(pThis->iPersistUpdCnt && ++pThis->iUpdsSincePersist >= pThis->iPersistUpdCnt) { + if(nUpdates == 0) + FINALIZE; + + pThis->iUpdsSincePersist += nUpdates; + if(pThis->iPersistUpdCnt && pThis->iUpdsSincePersist >= pThis->iPersistUpdCnt) { qqueuePersist(pThis, QUEUE_CHECKPOINT); pThis->iUpdsSincePersist = 0; } - return RS_RET_OK; +finalize_it: + RETiRet; +} + + +/* persist a queue with all data elements to disk - this is used to handle + * bSaveOnShutdown. We utilize the DA worker to do this. This must only + * be called after all workers have been shut down and if bSaveOnShutdown + * is actually set. Note that this function may potentially run long, + * depending on the queue configuration (e.g. store on remote machine). + * rgerhards, 2009-05-26 + */ +static inline rsRetVal +DoSaveOnShutdown(qqueue_t *pThis) +{ + struct timespec tTimeout; + rsRetVal iRetLocal; + DEFiRet; + + ISOBJ_TYPE_assert(pThis, qqueue); + +dbgprintf("after InitDA, queue log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + if(pThis->bRunsDA != 2) { + InitDA(pThis, QUEUE_MODE_ENQONLY, LOCK_MUTEX); /* switch to DA mode */ +dbgprintf("after InitDA, queue log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + qqueueWaitDAModeInitialized(pThis); /* make sure DA mode is actually started, else we may have a race! */ + } + /* make sure we do not timeout before we are done */ + dbgoprint((obj_t*) pThis, "bSaveOnShutdown configured, infinite timeout set\n"); + timeoutComp(&tTimeout, QUEUE_TIMEOUT_ETERNAL); + /* and run the primary queue's DA worker to drain the queue */ + iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN, &tTimeout); + dbgoprint((obj_t*) pThis, "end queue persistence run, iRet %d, queue size log %d, phys %d\n", + iRetLocal, getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + if(iRetLocal != RS_RET_OK) { + dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying to shut down primary queue in disk save mode, " + "continuing, but results are unpredictable\n", iRetLocal); + } + + RETiRet; } @@ -2001,14 +2335,26 @@ BEGINobjDestruct(qqueue) /* be sure to specify the object type also in END and C CODESTARTobjDestruct(qqueue) pThis->bQueueInDestruction = 1; /* indicate we are in destruction (modifies some behaviour) */ - /* shut down all workers (handles *all* of the persistence logic) - * See function head comment of queueShutdownWorkers () on why we don't call it - * We also do not need to shutdown workers when we are in enqueue-only mode or we are a + /* shut down all workers + * We do not need to shutdown workers when we are in enqueue-only mode or we are a * direct queue - because in both cases we have none... ;) * with a child! -- rgerhards, 2008-01-28 */ if(pThis->qType != QUEUETYPE_DIRECT && !pThis->bEnqOnly && pThis->pqParent == NULL) - qqueueShutdownWorkers(pThis); + ShutdownWorkers(pThis); + + /* now all workers are terminated. Messages may exist. Also, some logically dequeued + * messages may never have been processed because their worker was terminated. So + * we need to reset the logical dequeue pointer, persist the queue if configured to do + * so and then destruct everything. -- rgerhards, 2009-05-26 + */ +dbgprintf("XXX: pre unDeq disk log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + CHKiRet(pThis->qUnDeqAll(pThis)); +dbgprintf("XXX: post unDeq disk log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis)); + + if(pThis->bIsDA && getPhysicalQueueSize(pThis) > 0 && pThis->bSaveOnShutdown) { + CHKiRet(DoSaveOnShutdown(pThis)); + } /* finally destruct our (regular) worker thread pool * Note: currently pWtpReg is never NULL, but if we optimize our logic, this may happen, @@ -2063,11 +2409,8 @@ CODESTARTobjDestruct(qqueue) /* type-specific destructor */ iRet = pThis->qDestruct(pThis); - if(pThis->pszFilePrefix != NULL) - free(pThis->pszFilePrefix); - - if(pThis->pszSpoolDir != NULL) - free(pThis->pszSpoolDir); + free(pThis->pszFilePrefix); + free(pThis->pszSpoolDir); ENDobjDestruct(qqueue) @@ -2081,8 +2424,8 @@ qqueueSetFilePrefix(qqueue_t *pThis, uchar *pszPrefix, size_t iLenPrefix) { DEFiRet; - if(pThis->pszFilePrefix != NULL) - free(pThis->pszFilePrefix); + free(pThis->pszFilePrefix); + pThis->pszFilePrefix = NULL; if(pszPrefix == NULL) /* just unset the prefix! */ ABORT_FINALIZE(RS_RET_OK); @@ -2117,111 +2460,7 @@ finalize_it: } -/* enqueue a new user data element - * Enqueues the new element and awakes worker thread. - */ -rsRetVal -qqueueEnqObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) -{ - DEFiRet; - int iCancelStateSave; - struct timespec t; - - ISOBJ_TYPE_assert(pThis, qqueue); - - /* first check if we need to discard this message (which will cause CHKiRet() to exit) - * rgerhards, 2008-10-07: It is OK to do this outside of mutex protection. The iQueueSize - * and bRunsDA parameters may not reflect the correct settings here, but they are - * "good enough" in the sense that they can be used to drive the decision. Valgrind's - * threading tools may point this access to be an error, but this is done - * intentional. I do not see this causes problems to us. - */ - CHKiRet(qqueueChkDiscardMsg(pThis, pThis->iQueueSize, pThis->bRunsDA, pUsr)); - - /* Please note that this function is not cancel-safe and consequently - * sets the calling thread's cancelibility state to PTHREAD_CANCEL_DISABLE - * during its execution. If that is not done, race conditions occur if the - * thread is canceled (most important use case is input module termination). - * rgerhards, 2008-01-08 - */ - if(pThis->qType != QUEUETYPE_DIRECT) { - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); - d_pthread_mutex_lock(pThis->mut); - } - - /* then check if we need to add an assistance disk queue */ - if(pThis->bIsDA) - CHKiRet(qqueueChkStrtDA(pThis)); - - /* handle flow control - * There are two different flow control mechanisms: basic and advanced flow control. - * Basic flow control has always been implemented and protects the queue structures - * in that it makes sure no more data is enqueued than the queue is configured to - * support. Enhanced flow control is being added today. There are some sources which - * can easily be stopped, e.g. a file reader. This is the case because it is unlikely - * that blocking those sources will have negative effects (after all, the file is - * continued to be written). Other sources can somewhat be blocked (e.g. the kernel - * log reader or the local log stream reader): in general, nothing is lost if messages - * from these sources are not picked up immediately. HOWEVER, they can not block for - * an extended period of time, as this either causes message loss or - even worse - some - * other bad effects (e.g. unresponsive system in respect to the main system log socket). - * Finally, there are some (few) sources which can not be blocked at all. UDP syslog is - * a prime example. If a UDP message is not received, it is simply lost. So we can't - * do anything against UDP sockets that come in too fast. The core idea of advanced - * flow control is that we take into account the different natures of the sources and - * select flow control mechanisms that fit these needs. This also means, in the end - * result, that non-blockable sources like UDP syslog receive priority in the system. - * It's a side effect, but a good one ;) -- rgerhards, 2008-03-14 - */ - if(flowCtlType == eFLOWCTL_FULL_DELAY) { - while(pThis->iQueueSize >= pThis->iFullDlyMrk) { - dbgoprint((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message - blocking.\n"); - pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); /* TODO error check? But what do then? */ - } - } else if(flowCtlType == eFLOWCTL_LIGHT_DELAY) { - if(pThis->iQueueSize >= pThis->iLightDlyMrk) { - dbgoprint((obj_t*) pThis, "enqueueMsg: LightDelay mark reached for light delayable message - blocking a bit.\n"); - timeoutComp(&t, 1000); /* 1000 millisconds = 1 second TODO: make configurable */ - pthread_cond_timedwait(&pThis->belowLightDlyWtrMrk, pThis->mut, &t); /* TODO error check? But what do then? */ - } - } - - /* from our regular flow control settings, we are now ready to enqueue the object. - * However, we now need to do a check if the queue permits to add more data. If that - * is not the case, basic flow control enters the field, which means we wait for - * the queue to become ready or drop the new message. -- rgerhards, 2008-03-14 - */ - while( (pThis->iMaxQueueSize > 0 && pThis->iQueueSize >= pThis->iMaxQueueSize) - || (pThis->qType == QUEUETYPE_DISK && pThis->sizeOnDiskMax != 0 - && pThis->tVars.disk.sizeOnDisk > pThis->sizeOnDiskMax)) { - dbgoprint((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n"); - timeoutComp(&t, pThis->toEnq); - if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) { - dbgoprint((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n"); - objDestruct(pUsr); - ABORT_FINALIZE(RS_RET_QUEUE_FULL); - } - } - - /* and finally enqueue the message */ - CHKiRet(qqueueAdd(pThis, pUsr)); - qqueueChkPersist(pThis); - -finalize_it: - if(pThis->qType != QUEUETYPE_DIRECT) { - /* make sure at least one worker is running. */ - qqueueAdviseMaxWorkers(pThis); - /* and release the mutex */ - d_pthread_mutex_unlock(pThis->mut); - pthread_setcancelstate(iCancelStateSave, NULL); - dbgoprint((obj_t*) pThis, "EnqueueMsg advised worker start\n"); - } - - RETiRet; -} - - -/* enqueue a single data object. This currently is a helper to qqueueMultiEnqObj. +/* enqueue a single data object. * Note that the queue mutex MUST already be locked when this function is called. * rgerhards, 2009-06-16 */ @@ -2237,7 +2476,7 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) /* then check if we need to add an assistance disk queue */ if(pThis->bIsDA) - CHKiRet(qqueueChkStrtDA(pThis)); + CHKiRet(ChkStrtDA(pThis)); /* handle flow control * There are two different flow control mechanisms: basic and advanced flow control. @@ -2291,7 +2530,6 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) /* and finally enqueue the message */ CHKiRet(qqueueAdd(pThis, pUsr)); - qqueueChkPersist(pThis); // TODO: optimize, do in outer function! (but we need parts from v5?) finalize_it: RETiRet; @@ -2325,6 +2563,8 @@ dbgprintf("queueMultiEnq: %d\n", i); CHKiRet(doEnqSingleObj(pThis, pMultiSub->ppMsgs[i]->flowCtlType, (void*)pMultiSub->ppMsgs[i])); } + qqueueChkPersist(pThis, pMultiSub->nElem); + finalize_it: if(pThis->qType != QUEUETYPE_DIRECT) { /* make sure at least one worker is running. */ @@ -2339,6 +2579,40 @@ finalize_it: } +/* enqueue a new user data element + * Enqueues the new element and awakes worker thread. + */ +rsRetVal +qqueueEnqObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr) +{ + DEFiRet; + int iCancelStateSave; + + ISOBJ_TYPE_assert(pThis, qqueue); + + if(pThis->qType != QUEUETYPE_DIRECT) { + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); + d_pthread_mutex_lock(pThis->mut); + } + + CHKiRet(doEnqSingleObj(pThis, flowCtlType, pUsr)); + + qqueueChkPersist(pThis, 1); + +finalize_it: + if(pThis->qType != QUEUETYPE_DIRECT) { + /* make sure at least one worker is running. */ + qqueueAdviseMaxWorkers(pThis); + /* and release the mutex */ + d_pthread_mutex_unlock(pThis->mut); + pthread_setcancelstate(iCancelStateSave, NULL); + dbgoprint((obj_t*) pThis, "EnqueueMsg advised worker start\n"); + } + + RETiRet; +} + + /* set queue mode to enqueue only or not * There is one subtle issue: this method may be called during queue * construction or while it is running. In the former case, the queue @@ -2348,7 +2622,7 @@ finalize_it: * rgerhards, 2008-01-16 */ static rsRetVal -qqueueSetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex) +SetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex) { DEFiRet; DEFVARS_mutexProtection; @@ -2410,6 +2684,7 @@ DEFpropSetMeth(qqueue, iMinMsgsPerWrkr, int) DEFpropSetMeth(qqueue, bSaveOnShutdown, int) DEFpropSetMeth(qqueue, pUsr, void*) DEFpropSetMeth(qqueue, iDeqSlowdown, int) +DEFpropSetMeth(qqueue, iDeqBatchSize, int) DEFpropSetMeth(qqueue, sizeOnDiskMax, int64) @@ -2428,8 +2703,6 @@ static rsRetVal qqueueSetProperty(qqueue_t *pThis, var_t *pProp) if(isProp("iQueueSize")) { pThis->iQueueSize = pProp->val.num; - } else if(isProp("iUngottenObjs")) { - pThis->iUngottenObjs = pProp->val.num; } else if(isProp("tVars.disk.sizeOnDisk")) { pThis->tVars.disk.sizeOnDisk = pProp->val.num; } else if(isProp("tVars.disk.bytesRead")) { diff --git a/runtime/queue.h b/runtime/queue.h index 1d82d8d9..e873c456 100644 --- a/runtime/queue.h +++ b/runtime/queue.h @@ -27,8 +27,18 @@ #include <pthread.h> #include "obj.h" #include "wtp.h" +#include "batch.h" #include "stream.h" +/* support for the toDelete list */ +typedef struct toDeleteLst_s toDeleteLst_t; +struct toDeleteLst_s { + qDeqID deqID; + int nElemDeq; /* numbe of elements that were dequeued and as such must now be discarded */ + struct toDeleteLst_s *pNext; +}; + + /* queue types */ typedef enum { QUEUETYPE_FIXED_ARRAY = 0,/* a simple queue made out of a fixed (initially malloced) array fast but memoryhog */ @@ -54,10 +64,12 @@ typedef struct qWrkThrd_s { pthread_mutex_t mut; } qWrkThrd_t; /* type for queue worker threads */ + /* the queue object */ typedef struct queue_s { BEGINobjInstance; queueType_t qType; + int nLogDeq; /* number of elements currently logically dequeued */ bool bEnqOnly; /* does queue run in enqueue-only mode (1) or not (0)? */ bool bSaveOnShutdown;/* persists everthing on shutdown (if DA!)? 1-yes, 0-no */ bool bQueueStarted; /* has queueStart() been called on this queue? 1-yes, 0-no */ @@ -83,7 +95,9 @@ typedef struct queue_s { int toQShutdown; /* timeout for regular queue shutdown in ms */ int toActShutdown; /* timeout for long-running action shutdown in ms */ int toWrkShutdown; /* timeout for idle workers in ms, -1 means indefinite (0 is immediate) */ + toDeleteLst_t *toDeleteLst;/* this queue's to-delete list */ int toEnq; /* enqueue timeout */ + int iDeqBatchSize; /* max number of elements that shall be dequeued at once */ /* rate limiting settings (will be expanded) */ int iDeqSlowdown; /* slow down dequeue by specified nbr of microseconds */ /* end rate limiting */ @@ -97,18 +111,19 @@ typedef struct queue_s { * applied to detect user configuration errors (and tell me how should we detect what * the user really wanted...). -- rgerhards, 2008-04-02 */ - /* ane dequeue time window */ - rsRetVal (*pConsumer)(void *,void*); /* user-supplied consumer function for dequeued messages */ + /* end dequeue time window */ + rsRetVal (*pConsumer)(void *,batch_t*); /* user-supplied consumer function for dequeued messages */ /* calling interface for pConsumer: arg1 is the global user pointer from this structure, arg2 is the - * user pointer that was dequeued (actual sample: for actions, arg1 is the pAction and arg2 is pointer - * to message) - * rgerhards, 2008-01-28 + * user pointer array that was dequeued (actual sample: for actions, arg1 is the pAction and arg2 + * is pointer to an array of message message pointers) */ /* type-specific handlers (set during construction) */ rsRetVal (*qConstruct)(struct queue_s *pThis); rsRetVal (*qDestruct)(struct queue_s *pThis); rsRetVal (*qAdd)(struct queue_s *pThis, void *pUsr); - rsRetVal (*qDel)(struct queue_s *pThis, void **ppUsr); + rsRetVal (*qDeq)(struct queue_s *pThis, void **ppUsr); + rsRetVal (*qDel)(struct queue_s *pThis); + rsRetVal (*qUnDeqAll)(struct queue_s *pThis); /* end type-specific handler */ /* synchronization variables */ pthread_mutex_t mutThrdMgmt; /* mutex for the queue's thread management */ @@ -132,32 +147,30 @@ typedef struct queue_s { int iNumberFiles; /* how many files make up the queue? */ int64 iMaxFileSize; /* max size for a single queue file */ int64 sizeOnDiskMax; /* maximum size on disk allowed */ + qDeqID deqIDAdd; /* next dequeue ID to use during add to queue store */ + qDeqID deqIDDel; /* queue store delete position */ int bIsDA; /* is this queue disk assisted? */ int bRunsDA; /* is this queue actually *running* disk assisted? */ struct queue_s *pqDA; /* queue for disk-assisted modes */ struct queue_s *pqParent;/* pointer to the parent (if this is a child queue) */ int bDAEnqOnly; /* EnqOnly setting for DA queue */ - /* some data elements for the queueUngetObj() functionality. This list should always be short - * and is always kept in memory - */ - qLinkedList_t *pUngetRoot; - qLinkedList_t *pUngetLast; - int iUngottenObjs; /* number of objects currently in the "ungotten" list */ /* now follow queueing mode specific data elements */ union { /* different data elements based on queue type (qType) */ struct { - long head, tail; + long deqhead, head, tail; void** pBuf; /* the queued user data structure */ } farray; struct { - qLinkedList_t *pRoot; + qLinkedList_t *pDeqRoot; + qLinkedList_t *pDelRoot; qLinkedList_t *pLast; } linklist; struct { int64 sizeOnDisk; /* current amount of disk space used */ int64 bytesRead; /* number of bytes read from current (undeleted!) file */ - strm_t *pWrite; /* current file to be written */ - strm_t *pRead; /* current file to be read */ + strm_t *pWrite; /* current file to be written */ + strm_t *pReadDeq; /* current file for dequeueing */ + strm_t *pReadDel; /* current file for deleting */ } disk; } tVars; } qqueue_t; @@ -184,7 +197,7 @@ rsRetVal qqueueStart(qqueue_t *pThis); rsRetVal qqueueSetMaxFileSize(qqueue_t *pThis, size_t iMaxFileSize); rsRetVal qqueueSetFilePrefix(qqueue_t *pThis, uchar *pszPrefix, size_t iLenPrefix); rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThreads, - int iMaxQueueSize, rsRetVal (*pConsumer)(void*,void*)); + int iMaxQueueSize, rsRetVal (*pConsumer)(void*,batch_t*)); PROTOTYPEObjClassInit(qqueue); PROTOTYPEpropSetMeth(qqueue, iPersistUpdCnt, int); PROTOTYPEpropSetMeth(qqueue, bSyncQueueFiles, int); @@ -203,6 +216,7 @@ PROTOTYPEpropSetMeth(qqueue, bSaveOnShutdown, int); PROTOTYPEpropSetMeth(qqueue, pUsr, void*); PROTOTYPEpropSetMeth(qqueue, iDeqSlowdown, int); PROTOTYPEpropSetMeth(qqueue, sizeOnDiskMax, int64); +PROTOTYPEpropSetMeth(qqueue, iDeqBatchSize, int); #define qqueueGetID(pThis) ((unsigned long) pThis) #endif /* #ifndef QUEUE_H_INCLUDED */ diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index 8a043dde..71cb9cf2 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -69,8 +69,24 @@ #endif +/* the rsyslog core provides information about present feature to plugins + * asking it. Below are feature-test macros which must be used to query + * features. Note that this must be powers of two, so that multiple queries + * can be combined. -- rgerhards, 2009-04-27 + */ +#define CORE_FEATURE_BATCHING 1 +/*#define CORE_FEATURE_whatever 2 ... and so on ... */ + +/* some universal fixed size integer defines ... */ +typedef long long int64; +typedef long long unsigned uint64; +typedef int64 number_t; /* type to use for numbers - TODO: maybe an autoconf option? */ +typedef char intTiny; /* 0..127! */ +typedef unsigned char uintTiny; /* 0..255! */ + /* define some base data types */ typedef unsigned char uchar;/* get rid of the unhandy "unsigned char" */ +typedef struct aUsrp_s aUsrp_t; typedef struct thrdInfo thrdInfo_t; typedef struct obj_s obj_t; typedef struct ruleset_s ruleset_t; @@ -87,6 +103,7 @@ typedef struct nsd_gsspi_s nsd_gsspi_t; typedef struct nsd_nss_s nsd_nss_t; typedef struct nsdsel_ptcp_s nsdsel_ptcp_t; typedef struct nsdsel_gtls_s nsdsel_gtls_t; +typedef struct wti_s wti_t; typedef obj_t nsd_t; typedef obj_t nsdsel_t; typedef struct msg msg_t; @@ -102,18 +119,15 @@ typedef struct tcps_sess_s tcps_sess_t; typedef struct strmsrv_s strmsrv_t; typedef struct strms_sess_s strms_sess_t; typedef struct vmstk_s vmstk_t; +typedef struct batch_obj_s batch_obj_t; +typedef struct batch_s batch_t; +typedef struct wtp_s wtp_t; typedef rsRetVal (*prsf_t)(struct vmstk_s*, int); /* pointer to a RainerScript function */ +typedef uint64 qDeqID; /* queue Dequeue order ID. 32 bits is considered dangerously few */ typedef struct tcpLstnPortList_s tcpLstnPortList_t; // TODO: rename? typedef struct strmLstnPortList_s strmLstnPortList_t; // TODO: rename? -/* some universal 64 bit define... */ -typedef long long int64; -typedef long long unsigned uint64; -typedef int64 number_t; /* type to use for numbers - TODO: maybe an autoconf option? */ -typedef char intTiny; /* 0..127! */ -typedef uchar uintTiny; /* 0..255! */ - #ifdef __hpux typedef unsigned int u_int32_t; /* TODO: is this correct? */ typedef int socklen_t; @@ -367,15 +381,20 @@ enum rsRetVal_ /** return value. All methods return this if not specified oth RS_RET_FILENAME_INVALID = -2140, /**< filename invalid, not found, no access, ... */ RS_RET_ZLIB_ERR = -2141, /**< error during zlib call */ RS_RET_VAR_NOT_FOUND = -2142, /**< variable not found */ + RS_RET_NO_SRCNAME_TPL = -2143, /**< sourcename template was not specified where one was needed (omudpspoof spoof addr) */ + RS_RET_HOST_NOT_SPECIFIED = -2144, /**< (target) host was not specified where it was needed */ + RS_RET_ERR_LIBNET_INIT = -2145, /**< error initializing libnet */ /* RainerScript error messages (range 1000.. 1999) */ RS_RET_SYSVAR_NOT_FOUND = 1001, /**< system variable could not be found (maybe misspelled) */ /* some generic error/status codes */ + RS_RET_OK = 0, /**< operation successful */ RS_RET_OK_DELETE_LISTENTRY = 1, /**< operation successful, but callee requested the deletion of an entry (special state) */ RS_RET_TERMINATE_NOW = 2, /**< operation successful, function is requested to terminate (mostly used with threads) */ RS_RET_NO_RUN = 3, /**< operation successful, but function does not like to be executed */ - RS_RET_OK = 0 /**< operation successful */ + RS_RET_IDLE = 4, /**< operation successful, but callee is idle (e.g. because queue is empty) */ + RS_RET_TERMINATE_WHEN_IDLE = 5 /**< operation successful, function is requested to terminate when idle */ }; /* some helpful macros to work with srRetVals. diff --git a/runtime/srUtils.h b/runtime/srUtils.h index 16766312..6d5a784b 100644 --- a/runtime/srUtils.h +++ b/runtime/srUtils.h @@ -116,11 +116,13 @@ rsRetVal getFileSize(uchar *pszName, off_t *pSize); if(bMustLock == LOCK_MUTEX) { \ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); \ d_pthread_mutex_lock(mut); \ + assert(bLockedOpIsLocked == 0); \ bLockedOpIsLocked = 1; \ } #define END_MTX_PROTECTED_OPERATIONS(mut) \ if(bLockedOpIsLocked) { \ d_pthread_mutex_unlock(mut); \ + bLockedOpIsLocked = 0; \ pthread_setcancelstate(iCancelStateSave, NULL); \ } diff --git a/runtime/stream.c b/runtime/stream.c index a0571a61..4f611a62 100644 --- a/runtime/stream.c +++ b/runtime/stream.c @@ -243,7 +243,7 @@ static rsRetVal strmOpenFile(strm_t *pThis) pThis->pszFName, pThis->lenFName, pThis->iCurrFNum, pThis->iFileNumDigits)); } else { if(pThis->pszDir == NULL) { - if((pThis->pszCurrFName = (uchar*) strdup((char*) pThis->pszFName)) == NULL) + if((pThis->pszCurrFName = ustrdup(pThis->pszFName)) == NULL) ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); } else { CHKiRet(genFileName(&pThis->pszCurrFName, pThis->pszDir, pThis->lenDir, @@ -1425,6 +1425,46 @@ finalize_it: } +/* duplicate a stream object excluding dynamic properties. This function is + * primarily meant to provide a duplicate that later on can be used to access + * the data. This is needed, for example, for a restart of the disk queue. + * Note that ConstructFinalize() is NOT called. So our caller may change some + * properties before finalizing things. + * rgerhards, 2009-05-26 + */ +rsRetVal +strmDup(strm_t *pThis, strm_t **ppNew) +{ + strm_t *pNew = NULL; + DEFiRet; + + ISOBJ_TYPE_assert(pThis, strm); + assert(ppNew != NULL); + + CHKiRet(strmConstruct(&pNew)); + pNew->sType = pThis->sType; + pNew->iCurrFNum = pThis->iCurrFNum; + CHKmalloc(pNew->pszFName = ustrdup(pThis->pszFName)); + pNew->lenFName = pThis->lenFName; + CHKmalloc(pNew->pszDir = ustrdup(pThis->pszDir)); + pNew->lenDir = pThis->lenDir; + pNew->tOperationsMode = pThis->tOperationsMode; + pNew->tOpenMode = pThis->tOpenMode; + pNew->iMaxFileSize = pThis->iMaxFileSize; + pNew->iMaxFiles = pThis->iMaxFiles; + pNew->iFileNumDigits = pThis->iFileNumDigits; + pNew->bDeleteOnClose = pThis->bDeleteOnClose; + pNew->iCurrOffs = pThis->iCurrOffs; + + *ppNew = pNew; + pNew = NULL; + +finalize_it: + if(pNew != NULL) + strmDestruct(&pNew); + + RETiRet; +} /* set a user write-counter. This counter is initialized to zero and * receives the number of bytes written. It is accurate only after a @@ -1540,6 +1580,7 @@ CODESTARTobjQueryInterface(strm) pIf->RecordEnd = strmRecordEnd; pIf->Serialize = strmSerialize; pIf->GetCurrOffset = strmGetCurrOffset; + pIf->Dup = strmDup; pIf->SetWCntr = strmSetWCntr; /* set methods */ pIf->SetbDeleteOnClose = strmSetbDeleteOnClose; diff --git a/runtime/stream.h b/runtime/stream.h index cb368835..c251e5c4 100644 --- a/runtime/stream.h +++ b/runtime/stream.h @@ -169,6 +169,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */ rsRetVal (*Serialize)(strm_t *pThis, strm_t *pStrm); rsRetVal (*GetCurrOffset)(strm_t *pThis, int64 *pOffs); rsRetVal (*SetWCntr)(strm_t *pThis, number_t *pWCnt); + rsRetVal (*Dup)(strm_t *pThis, strm_t **ppNew); INTERFACEpropSetMeth(strm, bDeleteOnClose, int); INTERFACEpropSetMeth(strm, iMaxFileSize, int); INTERFACEpropSetMeth(strm, iMaxFiles, int); @@ -183,7 +184,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */ INTERFACEpropSetMeth(strm, iFlushInterval, int); INTERFACEpropSetMeth(strm, pszSizeLimitCmd, uchar*); ENDinterface(strm) -#define strmCURR_IF_VERSION 2 /* increment whenever you change the interface structure! */ +#define strmCURR_IF_VERSION 5 /* increment whenever you change the interface structure! */ /* prototypes */ diff --git a/runtime/wti.c b/runtime/wti.c index 156d8116..93c66028 100644 --- a/runtime/wti.c +++ b/runtime/wti.c @@ -39,10 +39,10 @@ #include <pthread.h> #include <errno.h> -#ifdef OS_SOLARIS -# include <sched.h> -# define pthread_yield() sched_yield() -#endif +/// TODO: check on solaris if this is any longer needed - I don't think so - rgerhards, 2009-09-20 +//#ifdef OS_SOLARIS +//# include <sched.h> +//#endif #include "rsyslog.h" #include "stringbuf.h" @@ -147,11 +147,12 @@ wtiSetState(wti_t *pThis, qWrkCmd_t tCmd, int bActiveOnly, int bLockMutex) break; } /* apply the new state */ +dbgprintf("worker terminator will write stateval %d\n", tCmd); unsigned val = ATOMIC_CAS_VAL(pThis->tCurrCmd, tCurrCmd, tCmd); if(val != tCurrCmd) { DBGPRINTF("wtiSetState PROBLEM, tCurrCmd %d overwritten with %d, wanted to set %d\n", tCurrCmd, val, tCmd); } - +//dbgprintf("worker terminator has written stateval %d\n", tCmd); } END_MTX_PROTECTED_OPERATIONS(&pThis->mut); @@ -211,6 +212,7 @@ CODESTARTobjDestruct(wti) pthread_cond_destroy(&pThis->condExitDone); pthread_mutex_destroy(&pThis->mut); + free(pThis->batch.pElem); free(pThis->pszDbgHdr); ENDobjDestruct(wti) @@ -230,15 +232,20 @@ rsRetVal wtiConstructFinalize(wti_t *pThis) { DEFiRet; + int iDeqBatchSize; ISOBJ_TYPE_assert(pThis, wti); dbgprintf("%s: finalizing construction of worker instance data\n", wtiGetDbgHdr(pThis)); /* initialize our thread instance descriptor */ - pThis->pUsrp = NULL; pThis->tCurrCmd = eWRKTHRD_STOPPED; + /* we now alloc the array for user pointers. We obtain the max from the queue itself. */ + CHKiRet(pThis->pWtp->pfGetDeqBatchSize(pThis->pWtp->pUsr, &iDeqBatchSize)); + CHKmalloc(pThis->batch.pElem = calloc((size_t)iDeqBatchSize, sizeof(batch_obj_t))); + +finalize_it: RETiRet; } @@ -256,7 +263,7 @@ wtiJoinThrd(wti_t *pThis) if (pThis->thrdID == 0) { dbgprintf("worker %s was already stopped\n", wtiGetDbgHdr(pThis)); } else { - pthread_join(pThis->thrdID, NULL); + //pthread_join(pThis->thrdID, NULL); wtiSetState(pThis, eWRKTHRD_STOPPED, 0, MUTEX_ALREADY_LOCKED); /* back to virgin... */ pThis->thrdID = 0; /* invalidate the thread ID so that we do not accidently find reused ones */ dbgprintf("worker %s has stopped\n", wtiGetDbgHdr(pThis)); @@ -322,7 +329,7 @@ wtiWorkerCancelCleanup(void *arg) DBGPRINTF("%s: cancelation cleanup handler called.\n", wtiGetDbgHdr(pThis)); /* call user supplied handler (that one e.g. requeues the element) */ - pWtp->pfOnWorkerCancel(pThis->pWtp->pUsr, pThis->pUsrp); + pWtp->pfOnWorkerCancel(pThis->pWtp->pUsr, pThis->batch.pElem[0].pUsrp); pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); d_pthread_mutex_lock(&pWtp->mut); @@ -336,75 +343,108 @@ wtiWorkerCancelCleanup(void *arg) } +/* wait for queue to become non-empty or timeout + * helper to wtiWorker + * IMPORTANT: mutex must be locked when this code is called! + * rgerhards, 2009-05-20 + */ +static inline void +doIdleProcessing(wti_t *pThis, wtp_t *pWtp, int *pbInactivityTOOccured) +{ + struct timespec t; + + BEGINfunc + DBGPRINTF("%s: worker IDLE, waiting for work.\n", wtiGetDbgHdr(pThis)); + pWtp->pfOnIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED); + + if(pWtp->toWrkShutdown == -1) { + /* never shut down any started worker */ + d_pthread_cond_wait(pWtp->pcondBusy, pWtp->pmutUsr); + } else { + timeoutComp(&t, pWtp->toWrkShutdown);/* get absolute timeout */ + if(d_pthread_cond_timedwait(pWtp->pcondBusy, pWtp->pmutUsr, &t) != 0) { + DBGPRINTF("%s: inactivity timeout, worker terminating...\n", wtiGetDbgHdr(pThis)); + *pbInactivityTOOccured = 1; /* indicate we had a timeout */ + } + } + ENDfunc +} + + /* generic worker thread framework */ #pragma GCC diagnostic ignored "-Wempty-body" rsRetVal wtiWorker(wti_t *pThis) { - DEFiRet; - DEFVARS_mutexProtection; - struct timespec t; + DEFVARS_mutexProtection_uncond; wtp_t *pWtp; /* our worker thread pool */ int bInactivityTOOccured = 0; + rsRetVal localRet; + rsRetVal terminateRet; + bool bMutexIsLocked; + DEFiRet; ISOBJ_TYPE_assert(pThis, wti); pWtp = pThis->pWtp; /* shortcut */ ISOBJ_TYPE_assert(pWtp, wtp); dbgSetThrdName(pThis->pszDbgHdr); - pThis->pUsrp = NULL; pthread_cleanup_push(wtiWorkerCancelCleanup, pThis); - BEGIN_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr, LOCK_MUTEX); + BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(pWtp->pmutUsr); pWtp->pfOnWorkerStartup(pWtp->pUsr); - END_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr); + END_MTX_PROTECTED_OPERATIONS_UNCOND(pWtp->pmutUsr); /* now we have our identity, on to real processing */ while(1) { /* loop will be broken below - need to do mutex locks */ /* process any pending thread requests */ - wtpProcessThrdChanges(pWtp); + // wtpProcessThrdChanges(pWtp); - /* if we have a rate-limiter set for this worker pool, let's call it. Please - * keep in mind that the rate-limiter may hold us for an extended period - * of time. -- rgerhards, 2008-04-02 - */ - if(pWtp->pfRateLimiter != NULL) { + if(pWtp->pfRateLimiter != NULL) { /* call rate-limiter, if defined */ pWtp->pfRateLimiter(pWtp->pUsr); } wtpSetInactivityGuard(pThis->pWtp, 0, LOCK_MUTEX); /* must be set before usr mutex is locked! */ - BEGIN_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr, LOCK_MUTEX); - - if( (bInactivityTOOccured && pWtp->pfIsIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED)) - || wtpChkStopWrkr(pWtp, LOCK_MUTEX, MUTEX_ALREADY_LOCKED)) { - END_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr); - break; /* end worker thread run */ + BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(pWtp->pmutUsr); + bMutexIsLocked = TRUE; + + /* first check if we are in shutdown process (but evaluate a bit later) */ + terminateRet = wtpChkStopWrkr(pWtp, LOCK_MUTEX, MUTEX_ALREADY_LOCKED); + if(terminateRet == RS_RET_TERMINATE_NOW) { + /* we now need to free the old batch */ + localRet = pWtp->pfObjProcessed(pWtp->pUsr, pThis); + dbgoprint((obj_t*) pThis, "terminating worker because of TERMINATE_NOW mode, del iRet %d\n", + localRet); + break; } - bInactivityTOOccured = 0; /* reset for next run */ - /* if we reach this point, we are still protected by the mutex */ - - if(pWtp->pfIsIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED)) { - DBGPRINTF("%s: worker IDLE, waiting for work.\n", wtiGetDbgHdr(pThis)); - pWtp->pfOnIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED); - - if(pWtp->toWrkShutdown == -1) { - /* never shut down any started worker */ - d_pthread_cond_wait(pWtp->pcondBusy, pWtp->pmutUsr); - } else { - timeoutComp(&t, pWtp->toWrkShutdown);/* get absolute timeout */ - if(d_pthread_cond_timedwait(pWtp->pcondBusy, pWtp->pmutUsr, &t) != 0) { - DBGPRINTF("%s: inactivity timeout, worker terminating...\n", wtiGetDbgHdr(pThis)); - bInactivityTOOccured = 1; /* indicate we had a timeout */ - } + /* try to execute and process whatever we have */ + /* This function must and does RELEASE the MUTEX! */ + localRet = pWtp->pfDoWork(pWtp->pUsr, pThis, iCancelStateSave); + bMutexIsLocked = FALSE; + + if(localRet == RS_RET_IDLE) { + if(terminateRet == RS_RET_TERMINATE_WHEN_IDLE) { + break; /* end of loop */ + } + + if(bInactivityTOOccured) { + /* we had an inactivity timeout in the last run and are still idle, so it is time to exit... */ + break; /* end worker thread run */ } - END_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr); + BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(pWtp->pmutUsr); + doIdleProcessing(pThis, pWtp, &bInactivityTOOccured); + END_MTX_PROTECTED_OPERATIONS_UNCOND(pWtp->pmutUsr); continue; /* request next iteration */ } - /* if we reach this point, we have a non-empty queue (and are still protected by mutex) */ - pWtp->pfDoWork(pWtp->pUsr, pThis, iCancelStateSave); + bInactivityTOOccured = 0; /* reset for next run */ + } + + /* if we exit the loop, the mutex may be locked and, if so, must be unlocked */ + if(bMutexIsLocked) { + END_MTX_PROTECTED_OPERATIONS_UNCOND(pWtp->pmutUsr); } /* indicate termination */ @@ -412,6 +452,7 @@ wtiWorker(wti_t *pThis) d_pthread_mutex_lock(&pThis->mut); pthread_cleanup_pop(0); /* remove cleanup handler */ +RUNLOG_STR("XXX: Worker shutdown"); pWtp->pfOnWorkerShutdown(pWtp->pUsr); wtiSetState(pThis, eWRKTHRD_TERMINATING, 0, MUTEX_ALREADY_LOCKED); diff --git a/runtime/wti.h b/runtime/wti.h index 72653b15..2acd3cf6 100644 --- a/runtime/wti.h +++ b/runtime/wti.h @@ -1,6 +1,6 @@ /* Definition of the worker thread instance (wti) class. * - * Copyright 2008 Rainer Gerhards and Adiscon GmbH. + * Copyright 2008, 2009 by Rainer Gerhards and Adiscon GmbH. * * This file is part of the rsyslog runtime library. * @@ -27,19 +27,21 @@ #include <pthread.h> #include "wtp.h" #include "obj.h" +#include "batch.h" + /* the worker thread instance class */ -typedef struct wti_s { +struct wti_s { BEGINobjInstance; pthread_t thrdID; /* thread ID */ qWrkCmd_t tCurrCmd; /* current command to be carried out by worker */ - obj_t *pUsrp; /* pointer to an object meaningful for current user pointer (e.g. queue pUsr data elemt) */ wtp_t *pWtp; /* my worker thread pool (important if only the work thread instance is passed! */ pthread_cond_t condExitDone; /* signaled when the thread exit is done (once per thread existance) */ pthread_mutex_t mut; + batch_t batch; /* pointer to an object array meaningful for current user pointer (e.g. queue pUsr data elemt) */ bool bShutdownRqtd; /* shutdown for this thread requested? 0 - no , 1 - yes */ uchar *pszDbgHdr; /* header string for debug messages */ -} wti_t; +}; /* some symbolic constants for easier reference */ diff --git a/runtime/wtp.c b/runtime/wtp.c index ba1f94b3..59553984 100644 --- a/runtime/wtp.c +++ b/runtime/wtp.c @@ -42,10 +42,10 @@ #include <atomic.h> #include <sys/prctl.h> -#ifdef OS_SOLARIS -# include <sched.h> -# define pthread_yield() sched_yield() -#endif +/// TODO: check on solaris if this is any longer needed - I don't think so - rgerhards, 2009-09-20 +//#ifdef OS_SOLARIS +//# include <sched.h> +//#endif #include "rsyslog.h" #include "stringbuf.h" @@ -81,7 +81,7 @@ wtpGetDbgHdr(wtp_t *pThis) /* Not implemented dummy function for constructor */ -static rsRetVal NotImplementedDummy() { return RS_RET_OK; } +static rsRetVal NotImplementedDummy() { return RS_RET_NOT_IMPLEMENTED; } /* Standard-Constructor for the wtp object */ BEGINobjConstruct(wtp) /* be sure to specify the object type also in END macro! */ @@ -90,12 +90,15 @@ BEGINobjConstruct(wtp) /* be sure to specify the object type also in END macro! pthread_cond_init(&pThis->condThrdTrm, NULL); /* set all function pointers to "not implemented" dummy so that we can safely call them */ pThis->pfChkStopWrkr = NotImplementedDummy; + pThis->pfGetDeqBatchSize = NotImplementedDummy; pThis->pfIsIdle = NotImplementedDummy; pThis->pfDoWork = NotImplementedDummy; + pThis->pfObjProcessed = NotImplementedDummy; pThis->pfOnIdle = NotImplementedDummy; pThis->pfOnWorkerCancel = NotImplementedDummy; pThis->pfOnWorkerStartup = NotImplementedDummy; pThis->pfOnWorkerShutdown = NotImplementedDummy; +dbgprintf("XXX: wtpConstruct: %d\n", pThis->wtpState); ENDobjConstruct(wtp) @@ -119,7 +122,7 @@ wtpConstructFinalize(wtp_t *pThis) */ if((pThis->pWrkr = malloc(sizeof(wti_t*) * pThis->iNumWorkerThreads)) == NULL) ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); - + for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) { CHKiRet(wtiConstruct(&pThis->pWrkr[i])); pWti = pThis->pWrkr[i]; @@ -249,7 +252,6 @@ wtpSetState(wtp_t *pThis, wtpState_t iNewState) /* check if the worker shall shutdown (1 = yes, 0 = no) - * TODO: check if we can use atomic operations to enhance performance * Note: there may be two mutexes locked, the bLockUsrMutex is the one in our "user" * (e.g. the queue clas) * rgerhards, 2008-01-21 @@ -263,16 +265,19 @@ wtpChkStopWrkr(wtp_t *pThis, int bLockMutex, int bLockUsrMutex) ISOBJ_TYPE_assert(pThis, wtp); BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex); - if( (pThis->wtpState == wtpState_SHUTDOWN_IMMEDIATE) - || ((pThis->wtpState == wtpState_SHUTDOWN) && pThis->pfIsIdle(pThis->pUsr, bLockUsrMutex))) - iRet = RS_RET_TERMINATE_NOW; - END_MTX_PROTECTED_OPERATIONS(&pThis->mut); + if(pThis->wtpState == wtpState_SHUTDOWN_IMMEDIATE) { + ABORT_FINALIZE(RS_RET_TERMINATE_NOW); + } else if(pThis->wtpState == wtpState_SHUTDOWN) { + ABORT_FINALIZE(RS_RET_TERMINATE_WHEN_IDLE); + } /* try customer handler if one was set and we do not yet have a definite result */ - if(iRet == RS_RET_OK && pThis->pfChkStopWrkr != NULL) { + if(pThis->pfChkStopWrkr != NULL) { iRet = pThis->pfChkStopWrkr(pThis->pUsr, bLockUsrMutex); } +finalize_it: + END_MTX_PROTECTED_OPERATIONS(&pThis->mut); RETiRet; } @@ -488,11 +493,12 @@ wtpWorker(void *arg) /* the arg is actually a wti object, even though we are in static rsRetVal wtpStartWrkr(wtp_t *pThis, int bLockMutex) { - DEFiRet; DEFVARS_mutexProtection; wti_t *pWti; int i; int iState; + pthread_attr_t attr; + DEFiRet; ISOBJ_TYPE_assert(pThis, wtp); @@ -516,7 +522,10 @@ wtpStartWrkr(wtp_t *pThis, int bLockMutex) pWti = pThis->pWrkr[i]; wtiSetState(pWti, eWRKTHRD_RUN_CREATED, 0, LOCK_MUTEX); - iState = pthread_create(&(pWti->thrdID), NULL, wtpWorker, (void*) pWti); + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + iState = pthread_create(&(pWti->thrdID), &attr, wtpWorker, (void*) pWti); + pthread_attr_destroy(&attr); /* TODO: we could globally reuse such an attribute 2009-07-08 */ dbgprintf("%s: started with state %d, num workers now %d\n", wtpGetDbgHdr(pThis), iState, pThis->iCurNumWrkThrd); @@ -586,8 +595,10 @@ DEFpropSetMethPTR(wtp, pmutUsr, pthread_mutex_t) DEFpropSetMethPTR(wtp, pcondBusy, pthread_cond_t) DEFpropSetMethFP(wtp, pfChkStopWrkr, rsRetVal(*pVal)(void*, int)) DEFpropSetMethFP(wtp, pfRateLimiter, rsRetVal(*pVal)(void*)) -DEFpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, int)) +DEFpropSetMethFP(wtp, pfGetDeqBatchSize, rsRetVal(*pVal)(void*, int*)) +DEFpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, wtp_t*)) DEFpropSetMethFP(wtp, pfDoWork, rsRetVal(*pVal)(void*, void*, int)) +DEFpropSetMethFP(wtp, pfObjProcessed, rsRetVal(*pVal)(void*, wti_t*)) DEFpropSetMethFP(wtp, pfOnIdle, rsRetVal(*pVal)(void*, int)) DEFpropSetMethFP(wtp, pfOnWorkerCancel, rsRetVal(*pVal)(void*, void*)) DEFpropSetMethFP(wtp, pfOnWorkerStartup, rsRetVal(*pVal)(void*)) diff --git a/runtime/wtp.h b/runtime/wtp.h index 1ce171cc..ef61d7e6 100644 --- a/runtime/wtp.h +++ b/runtime/wtp.h @@ -50,7 +50,7 @@ typedef enum { /* the worker thread pool (wtp) object */ -typedef struct wtp_s { +struct wtp_s { BEGINobjInstance; wtpState_t wtpState; int iNumWorkerThreads;/* number of worker threads to use */ @@ -66,12 +66,14 @@ typedef struct wtp_s { int bThrdStateChanged; /* at least one thread state has changed if 1 */ /* end sync variables */ /* user objects */ - void *pUsr; /* pointer to user object */ + void *pUsr; /* pointer to user object (in this case, the queue the wtp belongs to) */ pthread_mutex_t *pmutUsr; pthread_cond_t *pcondBusy; /* condition the user will signal "busy again, keep runing" on (awakes worker) */ rsRetVal (*pfChkStopWrkr)(void *pUsr, int); + rsRetVal (*pfGetDeqBatchSize)(void *pUsr, int*); /* obtains max dequeue count from queue config */ + rsRetVal (*pfObjProcessed)(void *pUsr, wti_t *pWti); /* indicate user object is processed */ rsRetVal (*pfRateLimiter)(void *pUsr); - rsRetVal (*pfIsIdle)(void *pUsr, int); + rsRetVal (*pfIsIdle)(void *pUsr, wtp_t *pWtp); rsRetVal (*pfDoWork)(void *pUsr, void *pWti, int); rsRetVal (*pfOnIdle)(void *pUsr, int); rsRetVal (*pfOnWorkerCancel)(void *pUsr, void*pWti); @@ -79,7 +81,7 @@ typedef struct wtp_s { rsRetVal (*pfOnWorkerShutdown)(void *pUsr); /* end user objects */ uchar *pszDbgHdr; /* header string for debug messages */ -} wtp_t; +}; /* some symbolic constants for easier reference */ @@ -103,8 +105,10 @@ int wtpGetCurNumWrkr(wtp_t *pThis, int bLockMutex); PROTOTYPEObjClassInit(wtp); PROTOTYPEpropSetMethFP(wtp, pfChkStopWrkr, rsRetVal(*pVal)(void*, int)); PROTOTYPEpropSetMethFP(wtp, pfRateLimiter, rsRetVal(*pVal)(void*)); -PROTOTYPEpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, int)); +PROTOTYPEpropSetMethFP(wtp, pfGetDeqBatchSize, rsRetVal(*pVal)(void*, int*)); +PROTOTYPEpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, wtp_t*)); PROTOTYPEpropSetMethFP(wtp, pfDoWork, rsRetVal(*pVal)(void*, void*, int)); +PROTOTYPEpropSetMethFP(wtp, pfObjProcessed, rsRetVal(*pVal)(void*, wti_t*)); PROTOTYPEpropSetMethFP(wtp, pfOnIdle, rsRetVal(*pVal)(void*, int)); PROTOTYPEpropSetMethFP(wtp, pfOnWorkerCancel, rsRetVal(*pVal)(void*,void*)); PROTOTYPEpropSetMethFP(wtp, pfOnWorkerStartup, rsRetVal(*pVal)(void*)); |