26 files changed, 1571 insertions, 1409 deletions
diff --git a/runtime/Makefile.am b/runtime/Makefile.am
index 14abe722..caf7c5ca 100644
--- a/runtime/Makefile.am
+++ b/runtime/Makefile.am
@@ -9,6 +9,7 @@ librsyslog_la_SOURCES = \
 	rsyslog.h \
 	unicode-helper.h \
 	atomic.h \
+	batch.h \
 	syslogd-types.h \
 	module-template.h \
 	obj-types.h \
diff --git a/runtime/apc.c b/runtime/apc.c
index bc330e39..c2f61266 100644
--- a/runtime/apc.c
+++ b/runtime/apc.c
@@ -249,12 +249,11 @@ execScheduled(void)
 	apc_list_t *pExecList;
 	apc_list_t *pCurr;
 	apc_list_t *pNext;
-	DEFVARS_mutexProtection_uncond;
 	DEFiRet;
 
-	BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex);
+	d_pthread_mutex_lock(&listMutex);
 	iRet = unlistCurrent(&pExecList);
-	END_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex);
+	d_pthread_mutex_unlock(&listMutex);
 	CHKiRet(iRet);
 
 	if(pExecList != NULL) {
@@ -290,14 +289,12 @@ ENDobjConstruct(apc)
 static rsRetVal
 apcConstructFinalize(apc_t *pThis, apc_id_t *pID)
 {
-	DEFVARS_mutexProtection_uncond;
 	DEFiRet;
 	ISOBJ_TYPE_assert(pThis, apc);
 	assert(pID != NULL);
-	BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex);
+	d_pthread_mutex_lock(&listMutex);
 	insertApc(pThis, pID);
-	END_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex);
-RUNLOG_STR("apcConstructFinalize post mutex unlock\n");
+	d_pthread_mutex_unlock(&listMutex);
 	RETiRet;
 }
 
@@ -333,12 +330,10 @@ SetParam2(apc_t *pThis, void *param2)
 static rsRetVal
 CancelApc(apc_id_t id)
 {
-	DEFVARS_mutexProtection_uncond;
-
 	BEGINfunc
-	BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex);
+	d_pthread_mutex_lock(&listMutex);
 	deleteApc(id);
-	END_MTX_PROTECTED_OPERATIONS_UNCOND(&listMutex);
+	d_pthread_mutex_unlock(&listMutex);
 	ENDfunc
 	return RS_RET_OK;
 }
diff --git a/runtime/atomic.h b/runtime/atomic.h
index d5aaf56b..b507b769 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -41,6 +41,8 @@
  * They simply came in too late. -- rgerhards, 2008-04-02
  */
 #ifdef HAVE_ATOMIC_BUILTINS
+#	define ATOMIC_SUB(data, val) __sync_fetch_and_sub(&(data), val)
+#	define ATOMIC_ADD(data, val) __sync_fetch_and_add(&(data), val)
 #	define ATOMIC_INC(data) ((void) __sync_fetch_and_add(&(data), 1))
 #	define ATOMIC_INC_AND_FETCH(data) __sync_fetch_and_add(&(data), 1)
 #	define ATOMIC_DEC(data) ((void) __sync_sub_and_fetch(&(data), 1))
@@ -49,7 +51,6 @@
 #	define ATOMIC_STORE_1_TO_32BIT(data) __sync_lock_test_and_set(&(data), 1)
 #	define ATOMIC_STORE_0_TO_INT(data) __sync_fetch_and_and(&(data), 0)
 #	define ATOMIC_STORE_1_TO_INT(data) __sync_fetch_and_or(&(data), 1)
-#	define ATOMIC_STORE_INT_TO_INT(data, val) __sync_fetch_and_or(&(data), (val))
 #	define ATOMIC_CAS(data, oldVal, newVal) __sync_bool_compare_and_swap(&(data), (oldVal), (newVal));
 #	define ATOMIC_CAS_VAL(data, oldVal, newVal) __sync_val_compare_and_swap(&(data), (oldVal), (newVal));
 #else
diff --git a/runtime/batch.h b/runtime/batch.h
new file mode 100644
index 00000000..031718a7
--- /dev/null
+++ b/runtime/batch.h
@@ -0,0 +1,72 @@
+/* Definition of the batch_t data structure.
+ * I am not sure yet if this will become a full-blown object. For now, this header just
+ * includes the object definition and is not accompanied by code.
+ *
+ * Copyright 2009 by Rainer Gerhards and Adiscon GmbH.
+ *
+ * This file is part of the rsyslog runtime library.
+ *
+ * The rsyslog runtime library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * The rsyslog runtime library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the rsyslog runtime library.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * A copy of the GPL can be found in the file "COPYING" in this distribution.
+ * A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution.
+ */
+
+#ifndef BATCH_H_INCLUDED
+#define BATCH_H_INCLUDED
+
+/* enum for batch states. Actually, we violate a layer here, in that we assume that a batch is used
+ * for action processing. So far, this seems acceptable, the status is simply ignored inside the
+ * main message queue. But over time, it could potentially be useful to split the two.
+ * rgerhad, 2009-05-12
+ */
+typedef enum {
+	BATCH_STATE_RDY  = 0,	/* object ready for processing */
+	BATCH_STATE_BAD  = 1,	/* unrecoverable failure while processing, do NOT resubmit to same action */
+	BATCH_STATE_SUB  = 2,	/* message submitted for processing, outcome yet unkonwn */
+	BATCH_STATE_COMM = 3,	/* message successfully commited */
+	BATCH_STATE_DISC = 4, 	/* discarded - processed OK, but do not submit to any other action */
+} batch_state_t;
+
+
+/* an object inside a batch, including any information (state!) needed for it to "life".
+ */
+struct batch_obj_s {
+	obj_t *pUsrp;		/* pointer to user object (most often message) */
+	batch_state_t state;	/* associated state */
+};
+
+/* the batch
+ * This object is used to dequeue multiple user pointers which are than handed over
+ * to processing. The size of elements is fixed after queue creation, but may be 
+ * modified by config variables (better said: queue properties).
+ * Note that a "user pointer" in rsyslog context so far always is a message 
+ * object. We stick to the more generic term because queues may potentially hold
+ * other types of objects, too.
+ * rgerhards, 2009-05-12
+ * Note that nElem is not necessarily equal to nElemDeq. This is the case when we
+ * discard some elements (because of configuration) during dequeue processing. As
+ * all Elements are only deleted when the batch is processed, we can not immediately
+ * delete them. So we need to keep their number that we can delete them when the batch
+ * is completed (else, the whole process does not work correctly).
+ */
+struct batch_s {
+	int nElem;		/* actual number of element in this entry */
+	int nElemDeq;		/* actual number of elements dequeued (and thus to be deleted) - see comment above! */
+	int iDoneUpTo;		/* all messages below this index have state other than RDY */
+	qDeqID	deqID;		/* ID of dequeue operation that generated this batch */
+	batch_obj_t *pElem;	/* batch elements */
+};
+
+#endif /* #ifndef BATCH_H_INCLUDED */
diff --git a/runtime/conf.c b/runtime/conf.c
index 83ed2e9b..2e37edf2 100644
--- a/runtime/conf.c
+++ b/runtime/conf.c
@@ -93,7 +93,7 @@ DEFobjCurrIf(net)
 DEFobjCurrIf(rule)
 DEFobjCurrIf(ruleset)
 
-static int iNbrActions; /* number of actions the running config has. Needs to be init on ReInitConf() */
+static int iNbrActions = 0; /* number of currently defined actions */
 
 /* The following global variables are used for building
  * tag and host selector lines during startup and config reload.
@@ -1102,7 +1102,7 @@ static rsRetVal cflineDoAction(uchar **p, action_t **ppAction)
 					dbgprintf("module is incompatible with RepeatedMsgReduction - turned off\n");
 					pAction->f_ReduceRepeated = 0;
 				}
-				pAction->bEnabled = 1; /* action is enabled */
+				pAction->eState = ACT_STATE_RDY; /* action is enabled */
 				iNbrActions++;	/* one more active action! */
 			}
 			break;
@@ -1204,21 +1204,6 @@ cfline(uchar *line, rule_t **pfCurr)
 }
 
 
-/* Reinitialize the configuration subsystem. This is a "work-around" to the fact
- * that we do not yet have actual config objects. This method is to be called
- * whenever a totally new config is started (which means on startup and HUP).
- * Note that it MUST NOT be called for an included config file.
- * rgerhards, 2008-07-28
- */
-static rsRetVal
-ReInitConf(void)
-{
-	DEFiRet;
-	iNbrActions = 0;	/* this is what we created the function for ;) - action count is reset */
-	RETiRet;
-}
-
-
 /* return the current number of active actions
  * rgerhards, 2008-07-28
  */
@@ -1252,7 +1237,6 @@ CODESTARTobjQueryInterface(conf)
 	pIf->doIncludeLine = doIncludeLine;
 	pIf->cfline = cfline;
 	pIf->processConfFile = processConfFile;
-	pIf->ReInitConf = ReInitConf;
 	pIf->GetNbrActActions = GetNbrActActions;
 
 finalize_it:
diff --git a/runtime/conf.h b/runtime/conf.h
index 25b887be..6db1623e 100644
--- a/runtime/conf.h
+++ b/runtime/conf.h
@@ -37,10 +37,12 @@ BEGINinterface(conf) /* name must also be changed in ENDinterface macro! */
 	rsRetVal (*doIncludeLine)(uchar **pp, __attribute__((unused)) void* pVal);
 	rsRetVal (*cfline)(uchar *line, rule_t **pfCurr);
 	rsRetVal (*processConfFile)(uchar *pConfFile);
-	rsRetVal (*ReInitConf)(void);
 	rsRetVal (*GetNbrActActions)(int *);
 ENDinterface(conf)
-#define confCURR_IF_VERSION 2 /* increment whenever you change the interface structure! */
+#define confCURR_IF_VERSION 3 /* increment whenever you change the interface structure! */
+/* in Version 3, entry point "ReInitConf()" was removed, as we do not longer need
+ * to support restart-type HUP -- rgerhards, 2009-07-15
+ */
 
 
 /* prototypes */
diff --git a/runtime/debug.c b/runtime/debug.c
index 1b592ef3..959d56a3 100644
--- a/runtime/debug.c
+++ b/runtime/debug.c
@@ -830,13 +830,12 @@ sigsegvHdlr(int signum)
 	abort();
 }
 
-#if 1
-#pragma GCC diagnostic ignored "-Wempty-body"
-/* write the debug message. This is a helper to dbgprintf and dbgoprint which
- * contains common code. added 2008-09-26 rgerhards
+/* actually write the debug message. This is a separate fuction because the cleanup_push/_pop
+ * interface otherwise is unsafe to use (generates compiler warnings at least).
+ * 2009-05-20 rgerhards
  */
-static void
-dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg)
+static inline void
+do_dbgprint(uchar *pszObjName, char *pszMsg, size_t lenMsg)
 {
 	static pthread_t ptLastThrdID = 0;
 	static int bWasNL = 0;
@@ -844,20 +843,6 @@ dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg)
 	char pszWriteBuf[32*1024];
 	size_t lenWriteBuf;
 	struct timespec t;
-	uchar *pszObjName = NULL;
-
-	/* we must get the object name before we lock the mutex, because the object
-	 * potentially calls back into us. If we locked the mutex, we would deadlock
-	 * ourselfs. On the other hand, the GetName call needs not to be protected, as
-	 * this thread has a valid reference. If such an object is deleted by another
-	 * thread, we are in much more trouble than just for dbgprint(). -- rgerhards, 2008-09-26
-	 */
-	if(pObj != NULL) {
-		pszObjName = obj.GetName(pObj);
-	}
-
-	pthread_mutex_lock(&mutdbgprint);
-	pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprint);
 
 	/* The bWasNL handler does not really work. It works if no thread
 	 * switching occurs during non-NL messages. Else, things are messed
@@ -905,11 +890,35 @@ dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg)
 	if(altdbg != -1) write(altdbg, pszMsg, lenMsg);
 
 	bWasNL = (pszMsg[lenMsg - 1] == '\n') ? 1 : 0;
+}
+
+#pragma GCC diagnostic ignored "-Wempty-body"
+/* write the debug message. This is a helper to dbgprintf and dbgoprint which
+ * contains common code. added 2008-09-26 rgerhards
+ */
+static void
+dbgprint(obj_t *pObj, char *pszMsg, size_t lenMsg)
+{
+	uchar *pszObjName = NULL;
+
+	/* we must get the object name before we lock the mutex, because the object
+	 * potentially calls back into us. If we locked the mutex, we would deadlock
+	 * ourselfs. On the other hand, the GetName call needs not to be protected, as
+	 * this thread has a valid reference. If such an object is deleted by another
+	 * thread, we are in much more trouble than just for dbgprint(). -- rgerhards, 2008-09-26
+	 */
+	if(pObj != NULL) {
+		pszObjName = obj.GetName(pObj);
+	}
+
+	pthread_mutex_lock(&mutdbgprint);
+	pthread_cleanup_push(dbgMutexCancelCleanupHdlr, &mutdbgprint);
+
+	do_dbgprint(pszObjName, pszMsg, lenMsg);
 
 	pthread_cleanup_pop(1);
 }
 #pragma GCC diagnostic warning "-Wempty-body"
-#endif
 
 /* print some debug output when an object is given
  * This is mostly a copy of dbgprintf, but I do not know how to combine it
@@ -1052,7 +1061,9 @@ int dbgEntrFunc(dbgFuncDB_t **ppFuncDB, const char *file, const char *func, int
 	/* when we reach this point, we have a fully-initialized FuncDB! */
 	ATOMIC_INC(pFuncDB->nTimesCalled);
 	if(bLogFuncFlow && dbgPrintNameIsInList((const uchar*)pFuncDB->file, printNameFileRoot))
-		dbgprintf("%s:%d: %s: enter\n", pFuncDB->file, pFuncDB->line, pFuncDB->func);
+		if(strcmp(pFuncDB->file, "stringbuf.c")) {	/* TODO: make configurable */
+			dbgprintf("%s:%d: %s: enter\n", pFuncDB->file, pFuncDB->line, pFuncDB->func);
+		}
 	if(pThrd->stackPtr >= (int) (sizeof(pThrd->callStack) / sizeof(dbgFuncDB_t*))) {
 		dbgprintf("%s:%d: %s: debug module: call stack for this thread full, suspending call tracking\n",
 			  pFuncDB->file, pFuncDB->line, pFuncDB->func);
@@ -1082,10 +1093,12 @@ void dbgExitFunc(dbgFuncDB_t *pFuncDB, int iStackPtrRestore, int iRet)
 
 	dbgFuncDBPrintActiveMutexes(pFuncDB, "WARNING: mutex still owned by us as we exit function, mutex: ", pthread_self());
 	if(bLogFuncFlow && dbgPrintNameIsInList((const uchar*)pFuncDB->file, printNameFileRoot)) {
-		if(iRet == RS_RET_NO_IRET)
-			dbgprintf("%s:%d: %s: exit: (no iRet)\n", pFuncDB->file, pFuncDB->line, pFuncDB->func);
-		else 
-			dbgprintf("%s:%d: %s: exit: %d\n", pFuncDB->file, pFuncDB->line, pFuncDB->func, iRet);
+		if(strcmp(pFuncDB->file, "stringbuf.c")) {	/* TODO: make configurable */
+			if(iRet == RS_RET_NO_IRET)
+				dbgprintf("%s:%d: %s: exit: (no iRet)\n", pFuncDB->file, pFuncDB->line, pFuncDB->func);
+			else 
+				dbgprintf("%s:%d: %s: exit: %d\n", pFuncDB->file, pFuncDB->line, pFuncDB->func, iRet);
+		}
 	}
 	pThrd->stackPtr = iStackPtrRestore;
 	if(pThrd->stackPtr < 0) {
diff --git a/runtime/glbl.c b/runtime/glbl.c
index 7fa61963..f27b8e73 100644
--- a/runtime/glbl.c
+++ b/runtime/glbl.c
@@ -39,6 +39,7 @@
 #include "cfsysline.h"
 #include "glbl.h"
 #include "prop.h"
+#include "atomic.h"
 
 /* some defaults */
 #ifndef DFLT_NETSTRM_DRVR
@@ -55,7 +56,6 @@ DEFobjCurrIf(prop)
  */
 static uchar *pszWorkDir = NULL;
 static int bOptimizeUniProc = 1;	/* enable uniprocessor optimizations */
-static int bHUPisRestart = 0;		/* should SIGHUP cause a full system restart? */
 static int bPreserveFQDN = 0;		/* should FQDNs always be preserved? */
 static int iMaxLine = 2048;		/* maximum length of a syslog message */
 static int iDefPFFamily = PF_UNSPEC;     /* protocol family (IPv4, IPv6 or both) */
@@ -72,6 +72,7 @@ static uchar *pszDfltNetstrmDrvr = NULL; /* module name of default netstream dri
 static uchar *pszDfltNetstrmDrvrCAF = NULL; /* default CA file for the netstrm driver */
 static uchar *pszDfltNetstrmDrvrKeyFile = NULL; /* default key file for the netstrm driver (server) */
 static uchar *pszDfltNetstrmDrvrCertFile = NULL; /* default cert file for the netstrm driver (server) */
+static int bTerminateInputs = 0;		/* global switch that inputs shall terminate ASAP (1=> terminate) */
 
 
 /* define a macro for the simple properties' set and get functions
@@ -95,7 +96,6 @@ static dataType Get##nameFunc(void) \
 
 SIMP_PROP(OptimizeUniProc, bOptimizeUniProc, int)
 SIMP_PROP(PreserveFQDN, bPreserveFQDN, int)
-SIMP_PROP(HUPisRestart, bHUPisRestart, int)
 SIMP_PROP(MaxLine, iMaxLine, int)
 SIMP_PROP(DefPFFamily, iDefPFFamily, int) /* note that in the future we may check the family argument */
 SIMP_PROP(DropMalPTRMsgs, bDropMalPTRMsgs, int)
@@ -117,6 +117,24 @@ SIMP_PROP_SET(DfltNetstrmDrvrCertFile, pszDfltNetstrmDrvrCertFile, uchar*) /* TO
 #undef SIMP_PROP_GET
 
 
+/* return global input termination status
+ * rgerhards, 2009-07-20
+ */
+static int GetGlobalInputTermState(void)
+{
+	return ATOMIC_FETCH_32BIT(bTerminateInputs);
+}
+
+
+/* set global termiantion state to "terminate". Note that this is a
+ * "once in a lifetime" action which can not be undone. -- gerhards, 2009-07-20
+ */
+static void SetGlobalInputTermination(void)
+{
+	ATOMIC_STORE_1_TO_INT(bTerminateInputs);
+}
+
+
 /* return our local hostname. if it is not set, "[localhost]" is returned
  */
 static uchar*
@@ -241,13 +259,14 @@ CODESTARTobjQueryInterface(glbl)
 	pIf->GetWorkDir = GetWorkDir;
 	pIf->GenerateLocalHostNameProperty = GenerateLocalHostNameProperty;
 	pIf->GetLocalHostNameProp = GetLocalHostNameProp;
+	pIf->SetGlobalInputTermination = SetGlobalInputTermination;
+	pIf->GetGlobalInputTermState = GetGlobalInputTermState;
 #define SIMP_PROP(name) \
 	pIf->Get##name = Get##name; \
 	pIf->Set##name = Set##name;
 	SIMP_PROP(MaxLine);
 	SIMP_PROP(OptimizeUniProc);
 	SIMP_PROP(PreserveFQDN);
-	SIMP_PROP(HUPisRestart);
 	SIMP_PROP(DefPFFamily);
 	SIMP_PROP(DropMalPTRMsgs);
 	SIMP_PROP(Option_DisallowWarning);
@@ -293,7 +312,6 @@ static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __a
 	}
 	bDropMalPTRMsgs = 0;
 	bOptimizeUniProc = 1;
-	bHUPisRestart = 0;
 	bPreserveFQDN = 0;
 	return RS_RET_OK;
 }
@@ -316,7 +334,6 @@ BEGINAbstractObjClassInit(glbl, 1, OBJ_IS_CORE_MODULE) /* class, version */
 	CHKiRet(regCfSysLineHdlr((uchar *)"defaultnetstreamdriverkeyfile", 0, eCmdHdlrGetWord, NULL, &pszDfltNetstrmDrvrKeyFile, NULL));
 	CHKiRet(regCfSysLineHdlr((uchar *)"defaultnetstreamdrivercertfile", 0, eCmdHdlrGetWord, NULL, &pszDfltNetstrmDrvrCertFile, NULL));
 	CHKiRet(regCfSysLineHdlr((uchar *)"optimizeforuniprocessor", 0, eCmdHdlrBinary, NULL, &bOptimizeUniProc, NULL));
-	CHKiRet(regCfSysLineHdlr((uchar *)"hupisrestart", 0, eCmdHdlrBinary, NULL, &bHUPisRestart, NULL));
 	CHKiRet(regCfSysLineHdlr((uchar *)"preservefqdn", 0, eCmdHdlrBinary, NULL, &bPreserveFQDN, NULL));
 	CHKiRet(regCfSysLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, NULL));
 ENDObjClassInit(glbl)
diff --git a/runtime/glbl.h b/runtime/glbl.h
index dcfb6d5f..0d0c8210 100644
--- a/runtime/glbl.h
+++ b/runtime/glbl.h
@@ -8,7 +8,7 @@
  * Please note that there currently is no glbl.c file as we do not yet
  * have any implementations.
  *
- * Copyright 2008 Rainer Gerhards and Adiscon GmbH.
+ * Copyright 2008, 2009 Rainer Gerhards and Adiscon GmbH.
  *
  * This file is part of the rsyslog runtime library.
  *
@@ -44,7 +44,6 @@ BEGINinterface(glbl) /* name must also be changed in ENDinterface macro! */
 	rsRetVal (*Set##name)(dataType);
 	SIMP_PROP(MaxLine, int)
 	SIMP_PROP(OptimizeUniProc, int)
-	SIMP_PROP(HUPisRestart, int)
 	SIMP_PROP(PreserveFQDN, int)
 	SIMP_PROP(DefPFFamily, int)
 	SIMP_PROP(DropMalPTRMsgs, int)
@@ -62,9 +61,12 @@ BEGINinterface(glbl) /* name must also be changed in ENDinterface macro! */
 	/* added v3, 2009-06-30 */
 	rsRetVal (*GenerateLocalHostNameProperty)(void);
 	prop_t* (*GetLocalHostNameProp)(void);
+	/* added v4, 2009-07-20 */
+	int (*GetGlobalInputTermState)(void);
+	void (*SetGlobalInputTermination)(void);
 #undef	SIMP_PROP
 ENDinterface(glbl)
-#define glblCURR_IF_VERSION 3 /* increment whenever you change the interface structure! */
+#define glblCURR_IF_VERSION 4 /* increment whenever you change the interface structure! */
 /* version 2 had PreserveFQDN added - rgerhards, 2008-12-08 */
 
 /* the remaining prototypes */
diff --git a/runtime/module-template.h b/runtime/module-template.h
index 3e963199..d49da2c9 100644
--- a/runtime/module-template.h
+++ b/runtime/module-template.h
@@ -368,6 +368,17 @@ static rsRetVal queryEtryPt(uchar *name, rsRetVal (**pEtryPoint)())\
 		*pEtryPoint = endTransaction;\
 	}
 
+
+/* the following definition is a queryEtryPt block that must be added
+ * if a non-output module supports "isCompatibleWithFeature".
+ * rgerhards, 2009-07-20
+ */
+#define CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES \
+	  else if(!strcmp((char*) name, "isCompatibleWithFeature")) {\
+		*pEtryPoint = isCompatibleWithFeature;\
+	}
+
+
 /* the following definition is the standard block for queryEtryPt for INPUT
  * modules. This can be used if no specific handling (e.g. to cover version
  * differences) is needed.
diff --git a/runtime/modules.c b/runtime/modules.c
index 871f356a..bdb15e7f 100644
--- a/runtime/modules.c
+++ b/runtime/modules.c
@@ -77,6 +77,27 @@ static modInfo_t *pLoadedModulesLast = NULL;	/* tail-pointer */
 uchar	*pModDir = NULL; /* read-only after startup */
 
 
+/* we provide a set of dummy functions for modules that do not support the
+ * some interfaces.
+ * On the commit feature: As the modules do not support it, they commit each message they
+ * receive, and as such the dummies can always return RS_RET_OK without causing
+ * harm. This simplifies things as in action processing we do not need to check
+ * if the transactional entry points exist.
+ */
+static rsRetVal dummyBeginTransaction() 
+{
+	return RS_RET_OK;
+}
+static rsRetVal dummyEndTransaction() 
+{
+	return RS_RET_OK;
+}
+static rsRetVal dummyIsCompatibleWithFeature() 
+{
+dbgprintf("XXX: dummy isCompatibleWithFeature called!\n");
+	return RS_RET_INCOMPATIBLE;
+}
+
 #ifdef DEBUG
 /* we add some home-grown support to track our users (and detect who does not free us). In
  * the long term, this should probably be migrated into debug.c (TODO). -- rgerhards, 2008-03-11
@@ -216,19 +237,38 @@ static void moduleDestruct(modInfo_t *pThis)
 }
 
 
+/* This enables a module to query the core for specific features.
+ * rgerhards, 2009-04-22
+ */
+static rsRetVal queryCoreFeatureSupport(int *pBool, unsigned uFeat)
+{
+	DEFiRet;
+
+	if((pBool == NULL))
+		ABORT_FINALIZE(RS_RET_PARAM_ERROR);
+
+	*pBool = (uFeat & CORE_FEATURE_BATCHING) ? 1 : 0;
+
+finalize_it:
+	RETiRet;
+}
+
+
 /* The following function is the queryEntryPoint for host-based entry points.
  * Modules may call it to get access to core interface functions. Please note
  * that utility functions can be accessed via shared libraries - at least this
  * is my current shool of thinking.
  * Please note that the implementation as a query interface allows to take
  * care of plug-in interface version differences. -- rgerhards, 2007-07-31
+ * ... but often it better not to use a new interface. So we now add core
+ * functions here that a plugin may request. -- rgerhards, 2009-04-22
  */
 static rsRetVal queryHostEtryPt(uchar *name, rsRetVal (**pEtryPoint)())
 {
 	DEFiRet;
 
 	if((name == NULL) || (pEtryPoint == NULL))
-		return RS_RET_PARAM_ERROR;
+		ABORT_FINALIZE(RS_RET_PARAM_ERROR);
 
 	if(!strcmp((char*) name, "regCfSysLineHdlr")) {
 		*pEtryPoint = regCfSysLineHdlr;
@@ -236,6 +276,8 @@ static rsRetVal queryHostEtryPt(uchar *name, rsRetVal (**pEtryPoint)())
 		*pEtryPoint = objGetObjInterface;
 	} else if(!strcmp((char*) name, "OMSRgetSupportedTplOpts")) {
 		*pEtryPoint = OMSRgetSupportedTplOpts;
+	} else if(!strcmp((char*) name, "queryCoreFeatureSupport")) {
+		*pEtryPoint = queryCoreFeatureSupport;
 	} else {
 		*pEtryPoint = NULL; /* to  be on the safe side */
 		ABORT_FINALIZE(RS_RET_ENTRY_POINT_NOT_FOUND);
@@ -392,6 +434,11 @@ doModInit(rsRetVal (*modInit)(int, int*, rsRetVal(**)(), rsRetVal(*)(), modInfo_
 	 */
 	CHKiRet((*pNew->modQueryEtryPt)((uchar*)"modGetID", &pNew->modGetID));
 	CHKiRet((*pNew->modQueryEtryPt)((uchar*)"modExit", &pNew->modExit));
+	localRet = (*pNew->modQueryEtryPt)((uchar*)"isCompatibleWithFeature", &pNew->isCompatibleWithFeature);
+	if(localRet == RS_RET_MODULE_ENTRY_POINT_NOT_FOUND)
+		pNew->isCompatibleWithFeature = dummyIsCompatibleWithFeature;
+	else if(localRet != RS_RET_OK)
+		ABORT_FINALIZE(localRet);
 
 	/* ... and now the module-specific interfaces */
 	switch(pNew->eType) {
@@ -406,12 +453,25 @@ doModInit(rsRetVal (*modInit)(int, int*, rsRetVal(**)(), rsRetVal(*)(), modInfo_
 			CHKiRet((*pNew->modQueryEtryPt)((uchar*)"dbgPrintInstInfo", &pNew->dbgPrintInstInfo));
 			CHKiRet((*pNew->modQueryEtryPt)((uchar*)"doAction", &pNew->mod.om.doAction));
 			CHKiRet((*pNew->modQueryEtryPt)((uchar*)"parseSelectorAct", &pNew->mod.om.parseSelectorAct));
-			CHKiRet((*pNew->modQueryEtryPt)((uchar*)"isCompatibleWithFeature", &pNew->isCompatibleWithFeature));
 			CHKiRet((*pNew->modQueryEtryPt)((uchar*)"tryResume", &pNew->tryResume));
 			/* try load optional interfaces */
 			localRet = (*pNew->modQueryEtryPt)((uchar*)"doHUP", &pNew->doHUP);
 			if(localRet != RS_RET_OK && localRet != RS_RET_MODULE_ENTRY_POINT_NOT_FOUND)
 				ABORT_FINALIZE(localRet);
+
+			localRet = (*pNew->modQueryEtryPt)((uchar*)"beginTransaction", &pNew->mod.om.beginTransaction);
+			if(localRet == RS_RET_MODULE_ENTRY_POINT_NOT_FOUND)
+				pNew->mod.om.beginTransaction = dummyBeginTransaction;
+			else if(localRet != RS_RET_OK)
+				ABORT_FINALIZE(localRet);
+
+			localRet = (*pNew->modQueryEtryPt)((uchar*)"endTransaction", &pNew->mod.om.endTransaction);
+			if(localRet == RS_RET_MODULE_ENTRY_POINT_NOT_FOUND) {
+				pNew->mod.om.endTransaction = dummyEndTransaction;
+				//pNew->mod.om.beginTransaction = dummyEndTransaction;
+			} else if(localRet != RS_RET_OK) {
+				ABORT_FINALIZE(localRet);
+			}
 			break;
 		case eMOD_LIB:
 			break;
diff --git a/runtime/modules.h b/runtime/modules.h
index 4d874019..71e3199c 100644
--- a/runtime/modules.h
+++ b/runtime/modules.h
@@ -111,7 +111,9 @@ typedef struct modInfo_s {
 		struct {/* data for output modules */
 			/* below: perform the configured action
 			 */
+			rsRetVal (*beginTransaction)(void*);
 			rsRetVal (*doAction)(uchar**, unsigned, void*);
+			rsRetVal (*endTransaction)(void*);
 			rsRetVal (*parseSelectorAct)(uchar**, void**,omodStringRequest_t**);
 		} om;
 		struct { /* data for library modules */
diff --git a/runtime/msg.c b/runtime/msg.c
index 208ea77a..d28ee350 100644
--- a/runtime/msg.c
+++ b/runtime/msg.c
@@ -1136,15 +1136,21 @@ char *getProtocolVersionString(msg_t *pM)
 }
 
 
-static char *getRawMsg(msg_t *pM)
+static inline void
+getRawMsg(msg_t *pM, uchar **pBuf, int *piLen)
 {
-	if(pM == NULL)
-		return "";
-	else
-		if(pM->pszRawMsg == NULL)
-			return "";
-		else
-			return (char*)pM->pszRawMsg;
+	if(pM == NULL) {
+		*pBuf=  UCHAR_CONSTANT("");
+		*piLen = 0;
+	} else {
+		if(pM->pszRawMsg == NULL) {
+			*pBuf=  UCHAR_CONSTANT("");
+			*piLen = 0;
+		} else {
+			*pBuf = pM->pszRawMsg;
+			*piLen = pM->iLenRawMsg;
+		}
+	}
 }
 
 
@@ -1171,7 +1177,7 @@ uchar *getMSG(msg_t *pM)
 	if(pM == NULL)
 		ret = UCHAR_CONSTANT("");
 	else {
-		if(pM->offMSG == -1)
+		if(pM->iLenMSG == 0)
 			ret = UCHAR_CONSTANT("");
 		else
 			ret = pM->pszRawMsg + pM->offMSG;
@@ -1762,10 +1768,10 @@ int getProgramNameLen(msg_t *pM, bool bLockMutex)
 /* get the "programname" as sz string
  * rgerhards, 2005-10-19
  */
-char *getProgramName(msg_t *pM, bool bLockMutex)
+uchar *getProgramName(msg_t *pM, bool bLockMutex)
 {
 	prepareProgramName(pM, bLockMutex);
-	return (pM->pCSProgName == NULL) ? "" : (char*) rsCStrGetSzStrNoNULL(pM->pCSProgName);
+	return (pM->pCSProgName == NULL) ? UCHAR_CONSTANT("") : rsCStrGetSzStrNoNULL(pM->pCSProgName);
 }
 
 
@@ -1782,7 +1788,7 @@ static void tryEmulateAPPNAME(msg_t *pM)
 
 	if(getProtocolVersion(pM) == 0) {
 		/* only then it makes sense to emulate */
-		MsgSetAPPNAME(pM, getProgramName(pM, MUTEX_ALREADY_LOCKED));
+		MsgSetAPPNAME(pM, (char*)getProgramName(pM, MUTEX_ALREADY_LOCKED));
 	}
 }
 
@@ -1947,12 +1953,20 @@ void MsgSetHOSTNAME(msg_t *pThis, uchar* pszHOSTNAME, int lenHOSTNAME)
 
 
 /* set the offset of the MSG part into the raw msg buffer
+ * Note that the offset may be higher than the length of the raw message 
+ * (exactly by one). This can happen if we have a message that does not 
+ * contain any MSG part.
  */
 void MsgSetMSGoffs(msg_t *pMsg, short offs)
 {
 	ISOBJ_TYPE_assert(pMsg, msg);
-	pMsg->iLenMSG = pMsg->iLenRawMsg - offs;
 	pMsg->offMSG = offs;
+	if(offs > pMsg->iLenRawMsg) {
+		assert(offs - 1 == pMsg->iLenRawMsg);
+		pMsg->iLenMSG = 0;
+	} else {
+		pMsg->iLenMSG = pMsg->iLenRawMsg - offs;
+	}
 }
 
 
@@ -1986,7 +2000,8 @@ rsRetVal MsgReplaceMSG(msg_t *pThis, uchar* pszMSG, int lenMSG)
 		pThis->pszRawMsg = bufNew;
 	}
 
-	memcpy(pThis->pszRawMsg + pThis->offMSG, pszMSG, lenMSG);
+	if(lenMSG > 0)
+		memcpy(pThis->pszRawMsg + pThis->offMSG, pszMSG, lenMSG);
 	pThis->pszRawMsg[lenNew] = '\0'; /* this also works with truncation! */
 	pThis->iLenRawMsg = lenNew;
 	pThis->iLenMSG = lenMSG;
@@ -2172,12 +2187,13 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
 			break;
 		case PROP_HOSTNAME:
 			pRes = (uchar*)getHOSTNAME(pMsg);
+			bufLen = getHOSTNAMELen(pMsg);
 			break;
 		case PROP_SYSLOGTAG:
 			getTAG(pMsg, &pRes, &bufLen);
 			break;
 		case PROP_RAWMSG:
-			pRes = (uchar*)getRawMsg(pMsg);
+			getRawMsg(pMsg, &pRes, &bufLen);
 			break;
 		/* enable this, if someone actually uses UxTradMsg, delete after some  time has
 		 * passed and nobody complained -- rgerhards, 2009-06-16
@@ -2209,6 +2225,7 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
 			break;
 		case PROP_IUT:
 			pRes = UCHAR_CONSTANT("1"); /* always 1 for syslog messages (a MonitorWare thing;)) */
+			bufLen = 1;
 			break;
 		case PROP_SYSLOGFACILITY:
 			pRes = (uchar*)getFacility(pMsg);
@@ -2226,7 +2243,7 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe,
 			pRes = (uchar*)getTimeGenerated(pMsg, pTpe->data.field.eDateFormat);
 			break;
 		case PROP_PROGRAMNAME:
-			pRes = (uchar*)getProgramName(pMsg, LOCK_MUTEX);
+			pRes = getProgramName(pMsg, LOCK_MUTEX);
 			break;
 		case PROP_PROTOCOL_VERSION:
 			pRes = (uchar*)getProtocolVersionString(pMsg);
diff --git a/runtime/msg.h b/runtime/msg.h
index 3a02365b..b006cbec 100644
--- a/runtime/msg.h
+++ b/runtime/msg.h
@@ -176,7 +176,7 @@ int getMSGLen(msg_t *pM);
 
 char *getHOSTNAME(msg_t *pM);
 int getHOSTNAMELen(msg_t *pM);
-char *getProgramName(msg_t *pM, bool bLockMutex);
+uchar *getProgramName(msg_t *pM, bool bLockMutex);
 int getProgramNameLen(msg_t *pM, bool bLockMutex);
 uchar *getRcvFrom(msg_t *pM);
 rsRetVal propNameToID(cstr_t *pCSPropName, propid_t *pPropID);
diff --git a/runtime/queue.c b/runtime/queue.c
index ddff1bcf..101052a1 100644
--- a/runtime/queue.c
+++ b/runtime/queue.c
@@ -8,7 +8,11 @@
  * (and in the web doc set on http://www.rsyslog.com/doc). Be sure to read it
  * if you are getting aquainted to the object.
  *
- * Copyright 2008 Rainer Gerhards and Adiscon GmbH.
+ * NOTE: as of 2009-04-22, I have begin to remove the qqueue* prefix from static
+ * function names - this makes it really hard to read and does not provide much
+ * benefit, at least I (now) think so...
+ *
+ * Copyright 2008, 2009 Rainer Gerhards and Adiscon GmbH.
  *
  * This file is part of the rsyslog runtime library.
  *
@@ -51,10 +55,10 @@
 #include "wti.h"
 #include "msg.h"
 #include "atomic.h"
+#include "msg.h" /* TODO: remove once we remove MsgAddRef() call */
 
 #ifdef OS_SOLARIS
 #	include <sched.h>
-#	define pthread_yield() sched_yield()
 #endif
 
 /* static data */
@@ -63,59 +67,157 @@ DEFobjCurrIf(glbl)
 DEFobjCurrIf(strm)
 
 /* forward-definitions */
-static rsRetVal qqueueChkPersist(qqueue_t *pThis);
-static rsRetVal qqueueSetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex);
-static rsRetVal qqueueRateLimiter(qqueue_t *pThis);
+static rsRetVal qqueueChkPersist(qqueue_t *pThis, int nUpdates);
+static rsRetVal SetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex);
+static rsRetVal RateLimiter(qqueue_t *pThis);
 static int qqueueChkStopWrkrDA(qqueue_t *pThis);
+static rsRetVal GetDeqBatchSize(qqueue_t *pThis, int *pVal);
 static int qqueueIsIdleDA(qqueue_t *pThis);
-static rsRetVal qqueueConsumerDA(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave);
-static rsRetVal qqueueConsumerCancelCleanup(void *arg1, void *arg2);
-static rsRetVal qqueueUngetObj(qqueue_t *pThis, obj_t *pUsr, int bLockMutex);
+static rsRetVal ConsumerDA(qqueue_t *pThis, wti_t *pWti);
+static rsRetVal batchProcessed(qqueue_t *pThis, wti_t *pWti);
 
 /* some constants for queuePersist () */
 #define QUEUE_CHECKPOINT	1
 #define QUEUE_NO_CHECKPOINT	0
 
+/***********************************************************************
+ * we need a private data structure, the "to-delete" list. As C does
+ * not provide any partly private data structures, we implement this
+ * structure right here inside the module.
+ * Note that this list must always be kept sorted based on a unique
+ * dequeue ID (which is monotonically increasing).
+ * rgerhards, 2009-05-18
+ ***********************************************************************/
+
+/* generate next uniqueue dequeue ID. Note that uniqueness is only required
+ * on a per-queue basis and while this instance runs. So a stricly monotonically
+ * increasing counter is sufficient (if enough bits are used).
+ */
+static inline qDeqID getNextDeqID(qqueue_t *pQueue)
+{
+	ISOBJ_TYPE_assert(pQueue, qqueue);
+	return pQueue->deqIDAdd++;
+}
+
+
+/* return the top element of the to-delete list or NULL, if the
+ * list is empty.
+ */
+static inline toDeleteLst_t *tdlPeek(qqueue_t *pQueue)
+{
+	ISOBJ_TYPE_assert(pQueue, qqueue);
+	return pQueue->toDeleteLst;
+}
+
+
+/* remove the top element of the to-delete list. Nothing but the
+ * element itself is destroyed. Must not be called when the list
+ * is empty.
+ */
+static inline rsRetVal tdlPop(qqueue_t *pQueue)
+{
+	toDeleteLst_t *pRemove;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pQueue, qqueue);
+	assert(pQueue->toDeleteLst != NULL);
+
+	pRemove = pQueue->toDeleteLst;
+	pQueue->toDeleteLst = pQueue->toDeleteLst->pNext;
+	free(pRemove);
+
+	RETiRet;
+}
+
+
+/* Add a new to-delete list entry. The function allocates the data
+ * structure, populates it with the values provided and links the new
+ * element into the correct place inside the list.
+ */
+static inline rsRetVal tdlAdd(qqueue_t *pQueue, qDeqID deqID, int nElemDeq)
+{
+	toDeleteLst_t *pNew;
+	toDeleteLst_t *pPrev;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pQueue, qqueue);
+	assert(pQueue->toDeleteLst != NULL);
+
+	CHKmalloc(pNew = malloc(sizeof(toDeleteLst_t)));
+	pNew->deqID = deqID;
+	pNew->nElemDeq = nElemDeq;
+
+	/* now find right spot */
+	for(  pPrev = pQueue->toDeleteLst
+	    ; pPrev != NULL && deqID > pPrev->deqID
+	    ; pPrev = pPrev->pNext) {
+		/*JUST SEARCH*/;
+	}
+
+	if(pPrev == NULL) {
+		pNew->pNext = pQueue->toDeleteLst;
+		pQueue->toDeleteLst = pNew;
+	} else {
+		pNew->pNext = pPrev->pNext;
+		pPrev->pNext = pNew;
+	}
+
+finalize_it:
+	RETiRet;
+}
+
+
 /* methods */
 
 
-/* get the overall queue size, which includes ungotten objects. Must only be called
+/* get the physical queue size. Must only be called
  * while mutex is locked!
  * rgerhards, 2008-01-29
  */
 static inline int
-qqueueGetOverallQueueSize(qqueue_t *pThis)
+getPhysicalQueueSize(qqueue_t *pThis)
 {
-#if 0 /* leave a bit in for debugging -- rgerhards, 2008-01-30 */
-BEGINfunc
-dbgoprint((obj_t*) pThis, "queue size: %d (regular %d, ungotten %d)\n",
-	   pThis->iQueueSize + pThis->iUngottenObjs, pThis->iQueueSize, pThis->iUngottenObjs);
-ENDfunc
-#endif
-	return pThis->iQueueSize + pThis->iUngottenObjs;
+	return pThis->iQueueSize;
 }
 
 
+/* get the logical queue size (that is store size minus logically dequeued elements).
+ * Must only be called while mutex is locked!
+ * rgerhards, 2009-05-19
+ */
+static inline int
+getLogicalQueueSize(qqueue_t *pThis)
+{
+	return pThis->iQueueSize - pThis->nLogDeq;
+}
+
+
+
 /* This function drains the queue in cases where this needs to be done. The most probable
  * reason is a HUP which needs to discard data (because the queue is configured to be lossy).
  * During a shutdown, this is typically not needed, as the OS frees up ressources and does
  * this much quicker than when we clean up ourselvs. -- rgerhards, 2008-10-21
  * This function returns void, as it makes no sense to communicate an error back, even if
  * it happens.
+ * This functions works "around" the regular deque mechanism, because it is only used to
+ * clean up (in cases where message loss is acceptable). 
  */
 static inline void queueDrain(qqueue_t *pThis)
 {
 	void *pUsr;
-	
 	ASSERT(pThis != NULL);
 
+	BEGINfunc
+	DBGOPRINT((obj_t*) pThis, "queue (type %d) will lose %d messages, destroying...\n", pThis->qType, pThis->iQueueSize);
 	/* iQueueSize is not decremented by qDel(), so we need to do it ourselves */
 	while(ATOMIC_DEC_AND_FETCH(pThis->iQueueSize) > 0) {
-		pThis->qDel(pThis, &pUsr);
+		pThis->qDeq(pThis, &pUsr);
 		if(pUsr != NULL) {
 			objDestruct(pUsr);
 		}
+		pThis->qDel(pThis);
 	}
+	ENDfunc
 }
 
 
@@ -138,37 +240,17 @@ static inline rsRetVal qqueueAdviseMaxWorkers(qqueue_t *pThis)
 			/* if we have not yet reached the high water mark, there is no need to start a
 			 * worker. -- rgerhards, 2008-01-26
 			 */
-			if(qqueueGetOverallQueueSize(pThis) >= pThis->iHighWtrMrk || pThis->bQueueStarted == 0) {
+			if(getLogicalQueueSize(pThis) >= pThis->iHighWtrMrk || pThis->bQueueStarted == 0) {
 				wtpAdviseMaxWorkers(pThis->pWtpDA, 1); /* disk queues have always one worker */
 			}
+		}
+		/* regular workers always run */
+		if(pThis->qType == QUEUETYPE_DISK || pThis->iMinMsgsPerWrkr == 0) {
+			iMaxWorkers = 1;
 		} else {
-			if(pThis->qType == QUEUETYPE_DISK || pThis->iMinMsgsPerWrkr == 0) {
-				iMaxWorkers = 1;
-			} else {
-				iMaxWorkers = qqueueGetOverallQueueSize(pThis) / pThis->iMinMsgsPerWrkr + 1;
-			}
-			wtpAdviseMaxWorkers(pThis->pWtpReg, iMaxWorkers); /* disk queues have always one worker */
+			iMaxWorkers = getLogicalQueueSize(pThis) / pThis->iMinMsgsPerWrkr + 1;
 		}
-	}
-
-	RETiRet;
-}
-
-
-/* wait until we have a fully initialized DA queue. Sometimes, we need to
- * sync with it, as we expect it for some function.
- * rgerhards, 2008-02-27
- */
-static rsRetVal
-qqueueWaitDAModeInitialized(qqueue_t *pThis)
-{
-	DEFiRet;
-
-	ISOBJ_TYPE_assert(pThis, qqueue);
-	ASSERT(pThis->bRunsDA);
-
-	while(pThis->bRunsDA != 2) {
-		d_pthread_cond_wait(&pThis->condDAReady, pThis->mut);
+		wtpAdviseMaxWorkers(pThis->pWtpReg, iMaxWorkers); /* disk queues have always one worker */
 	}
 
 	RETiRet;
@@ -186,45 +268,16 @@ qqueueWaitDAModeInitialized(qqueue_t *pThis)
  * rgerhards, 2008-01-15
  */
 static rsRetVal
-qqueueTurnOffDAMode(qqueue_t *pThis)
+TurnOffDAMode(qqueue_t *pThis)
 {
 	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 	ASSERT(pThis->bRunsDA);
-
-	/* at this point, we need a fully initialized DA queue. So if it isn't, we finally need
-	 * to wait for its startup... -- rgerhards, 2008-01-25
-	 */
-	qqueueWaitDAModeInitialized(pThis);
-
-	/* if we need to pull any data that we still need from the (child) disk queue,
-	 * now would be the time to do so. At present, we do not need this, but I'd like to
-	 * keep that comment if future need arises.
-	 */
-
-	/* we need to check if the DA queue is empty because the DA worker may simply have
-	 * terminated do to no new messages arriving. That does not, however, mean that the
-	 * DA queue is empty. If there is still data in that queue, we do nothing and leave
-	 * that for a later incarnation of this function (it will be called multiple times
-	 * during the lifetime of DA-mode, depending on how often the DA worker receives an
-	 * inactivity timeout. -- rgerhards, 2008-01-25
-	 */
-	if(pThis->pqDA->iQueueSize == 0) {
+	if(getLogicalQueueSize(pThis->pqDA) == 0) {
 		pThis->bRunsDA = 0; /* tell the world we are back in non-DA mode */
-		/* we destruct the queue object, which will also shutdown the queue worker. As the queue is empty,
-		 * this will be quick.
-		 */
-		qqueueDestruct(&pThis->pqDA); /* and now we are ready to destruct the DA queue */
-		dbgoprint((obj_t*) pThis, "disk-assistance has been turned off, disk queue was empty (iRet %d)\n",
+		DBGOPRINT((obj_t*) pThis, "disk-assistance has been turned off, disk queue was empty (iRet %d)\n",
 			  iRet);
-		/* now we need to check if the regular queue has some messages. This may be the case
-		 * when it is waiting that the high water mark is reached again. If so, we need to start up
-		 * a regular worker. -- rgerhards, 2008-01-26
-		 */
-		if(qqueueGetOverallQueueSize(pThis) > 0) {
-			qqueueAdviseMaxWorkers(pThis);
-		}
 	}
 
 	RETiRet;
@@ -246,9 +299,9 @@ qqueueChkIsDA(qqueue_t *pThis)
 	ISOBJ_TYPE_assert(pThis, qqueue);
 	if(pThis->pszFilePrefix != NULL) {
 		pThis->bIsDA = 1;
-		dbgoprint((obj_t*) pThis, "is disk-assisted, disk will be used on demand\n");
+		DBGOPRINT((obj_t*) pThis, "is disk-assisted, disk will be used on demand\n");
 	} else {
-		dbgoprint((obj_t*) pThis, "is NOT disk-assisted\n");
+		DBGOPRINT((obj_t*) pThis, "is NOT disk-assisted\n");
 	}
 
 	RETiRet;
@@ -267,16 +320,13 @@ qqueueChkIsDA(qqueue_t *pThis)
  * rgerhards, 2008-01-15
  */
 static rsRetVal
-qqueueStartDA(qqueue_t *pThis)
+StartDA(qqueue_t *pThis)
 {
 	DEFiRet;
 	uchar pszDAQName[128];
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 
-	if(pThis->bRunsDA == 2) /* check if already in (fully initialized) DA mode... */
-		FINALIZE;       /* ... then we are already done! */
-
 	/* create message queue */
 	CHKiRet(qqueueConstruct(&pThis->pqDA, QUEUETYPE_DISK , 1, 0, pThis->pConsumer));
 
@@ -298,11 +348,15 @@ qqueueStartDA(qqueue_t *pThis)
 	CHKiRet(qqueueSetbSyncQueueFiles(pThis->pqDA, pThis->bSyncQueueFiles));
 	CHKiRet(qqueueSettoActShutdown(pThis->pqDA, pThis->toActShutdown));
 	CHKiRet(qqueueSettoEnq(pThis->pqDA, pThis->toEnq));
-	CHKiRet(qqueueSetEnqOnly(pThis->pqDA, pThis->bDAEnqOnly, MUTEX_ALREADY_LOCKED));
+	CHKiRet(SetEnqOnly(pThis->pqDA, pThis->bDAEnqOnly, MUTEX_ALREADY_LOCKED));
 	CHKiRet(qqueueSetiDeqtWinFromHr(pThis->pqDA, pThis->iDeqtWinFromHr));
 	CHKiRet(qqueueSetiDeqtWinToHr(pThis->pqDA, pThis->iDeqtWinToHr));
 	CHKiRet(qqueueSetiHighWtrMrk(pThis->pqDA, 0));
 	CHKiRet(qqueueSetiDiscardMrk(pThis->pqDA, 0));
+
+	// experimental: XXX
+	CHKiRet(qqueueSettoWrkShutdown(pThis->pqDA, 0));
+
 	if(pThis->toQShutdown == 0) {
 		CHKiRet(qqueueSettoQShutdown(pThis->pqDA, 0)); /* if the user really wants... */
 	} else {
@@ -318,19 +372,9 @@ qqueueStartDA(qqueue_t *pThis)
 	if(iRet != RS_RET_OK && iRet != RS_RET_FILE_NOT_FOUND)
 		FINALIZE; /* something is wrong */
 
-	/* as we are right now starting DA mode because we are so busy, it is
-	 * extremely unlikely that any regular worker is sleeping on empty queue. HOWEVER,
-	 * we want to be on the safe side, and so we awake anyone that is waiting
-	 * on one. So even if the scheduler plays badly with us, things should be
-	 * quite well. -- rgerhards, 2008-01-15
-	 */
-	wtpWakeupWrkr(pThis->pWtpReg); /* awake all workers, but not ourselves ;) */
-
-	pThis->bRunsDA = 2;	/* we are now in DA mode, but not fully initialized */
-	pThis->bChildIsDone = 0;/* set to 1 when child's worker detect queue is finished */
-	pthread_cond_broadcast(&pThis->condDAReady); /* signal we are now initialized and ready to go ;) */
+	//pthread_cond_broadcast(&pThis->condDAReady); /* signal we are now initialized and ready to go ;) */
 
-	dbgoprint((obj_t*) pThis, "is now running in disk assisted mode, disk queue 0x%lx\n",
+	DBGOPRINT((obj_t*) pThis, "is now running in disk assisted mode, disk queue 0x%lx\n",
 		  qqueueGetID(pThis->pqDA));
 
 finalize_it:
@@ -338,7 +382,7 @@ finalize_it:
 		if(pThis->pqDA != NULL) {
 			qqueueDestruct(&pThis->pqDA);
 		}
-		dbgoprint((obj_t*) pThis, "error %d creating disk queue - giving up.\n", iRet);
+		DBGOPRINT((obj_t*) pThis, "error %d creating disk queue - giving up.\n", iRet);
 		pThis->bIsDA = 0;
 	}
 
@@ -352,8 +396,8 @@ finalize_it:
  * If this function fails (should not happen), DA mode is not turned on.
  * rgerhards, 2008-01-16
  */
-static inline rsRetVal
-qqueueInitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
+static rsRetVal
+InitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
 {
 	DEFiRet;
 	DEFVARS_mutexProtection;
@@ -366,17 +410,18 @@ qqueueInitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
 	 * is intentional. We assume that when we need it once, we may also need it on another
 	 * occasion. Ressources used are quite minimal when no worker is running.
 	 * rgerhards, 2008-01-24
+	 * NOTE: this is the DA worker *pool*, not the DA queue!
 	 */
 	if(pThis->pWtpDA == NULL) {
-		lenBuf = snprintf((char*)pszBuf, sizeof(pszBuf), "%s:DA", obj.GetName((obj_t*) pThis));
+		lenBuf = snprintf((char*)pszBuf, sizeof(pszBuf), "%s:DAwpool", obj.GetName((obj_t*) pThis));
 		CHKiRet(wtpConstruct		(&pThis->pWtpDA));
 		CHKiRet(wtpSetDbgHdr		(pThis->pWtpDA, pszBuf, lenBuf));
 		CHKiRet(wtpSetpfChkStopWrkr	(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, int)) qqueueChkStopWrkrDA));
-		CHKiRet(wtpSetpfIsIdle		(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, int)) qqueueIsIdleDA));
-		CHKiRet(wtpSetpfDoWork		(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, void *pWti, int)) qqueueConsumerDA));
-		CHKiRet(wtpSetpfOnWorkerCancel	(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, void*pWti)) qqueueConsumerCancelCleanup));
-		CHKiRet(wtpSetpfOnWorkerStartup	(pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) qqueueStartDA));
-		CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) qqueueTurnOffDAMode));
+		CHKiRet(wtpSetpfGetDeqBatchSize	(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, int*)) GetDeqBatchSize));
+		CHKiRet(wtpSetpfIsIdle		(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, wtp_t*)) qqueueIsIdleDA));
+		CHKiRet(wtpSetpfDoWork		(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, void *pWti)) ConsumerDA));
+		CHKiRet(wtpSetpfObjProcessed	(pThis->pWtpDA, (rsRetVal (*)(void *pUsr, wti_t *pWti)) batchProcessed));
+		CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpDA, (rsRetVal (*)(void *pUsr)) TurnOffDAMode));
 		CHKiRet(wtpSetpmutUsr		(pThis->pWtpDA, pThis->mut));
 		CHKiRet(wtpSetpcondBusy		(pThis->pWtpDA, &pThis->notEmpty));
 		CHKiRet(wtpSetiNumWorkerThreads	(pThis->pWtpDA, 1));
@@ -387,14 +432,20 @@ qqueueInitDA(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
 	/* if we reach this point, we have a "good" DA worker pool */
 
 	/* indicate we now run in DA mode - this is reset by the DA worker if it fails */
-	pThis->bRunsDA = 1;
 	pThis->bDAEnqOnly = bEnqOnly;
 
+	/* now construct the actual queue (if it does not already exist) */
+	if(pThis->pqDA == NULL) {
+		CHKiRet(StartDA(pThis));
+	}
+
+	pThis->bRunsDA = 1;
+
 	/* now we must now adivse the wtp that we need one worker. If none is yet active,
 	 * that will also start one up. If we forgot that step, everything would be stalled
 	 * until the next enqueue request.
 	 */
-	wtpAdviseMaxWorkers(pThis->pWtpDA, 1); /* DA queues alsways have just one worker max */
+	wtpAdviseMaxWorkers(pThis->pWtpDA, 1); /* DA queues always have just one worker max */
 
 finalize_it:
 	END_MTX_PROTECTED_OPERATIONS(pThis->mut);
@@ -408,15 +459,15 @@ finalize_it:
  * complete.
  * rgerhards, 2008-01-14
  */
-static inline rsRetVal
-qqueueChkStrtDA(qqueue_t *pThis)
+static rsRetVal
+ChkStrtDA(qqueue_t *pThis)
 {
 	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 
 	/* if we do not hit the high water mark, we have nothing to do */
-	if(qqueueGetOverallQueueSize(pThis) != pThis->iHighWtrMrk)
+	if(getPhysicalQueueSize(pThis) != pThis->iHighWtrMrk)
 		ABORT_FINALIZE(RS_RET_OK);
 
 	if(pThis->bRunsDA) {
@@ -429,16 +480,16 @@ qqueueChkStrtDA(qqueue_t *pThis)
 		 * terminated due to the inactivity timeout, thus we need to advise the pool that
 		 * we need at least one).
 		 */
-		dbgoprint((obj_t*) pThis, "%d entries - passed high water mark in DA mode, send notify\n",
-			  qqueueGetOverallQueueSize(pThis));
+		DBGOPRINT((obj_t*) pThis, "%d entries - passed high water mark in DA mode, send notify\n",
+			  getPhysicalQueueSize(pThis));
 		qqueueAdviseMaxWorkers(pThis);
 	} else {
 		/* this is the case when we are currently not running in DA mode. So it is time
 		 * to turn it back on.
 		 */
-		dbgoprint((obj_t*) pThis, "%d entries - passed high water mark for disk-assisted mode, initiating...\n",
-			  qqueueGetOverallQueueSize(pThis));
-		qqueueInitDA(pThis, QUEUE_MODE_ENQDEQ, MUTEX_ALREADY_LOCKED); /* initiate DA mode */
+		DBGOPRINT((obj_t*) pThis, "%d entries - passed high water mark for disk-assisted mode, initiating...\n",
+			  getPhysicalQueueSize(pThis));
+		InitDA(pThis, QUEUE_MODE_ENQDEQ, MUTEX_ALREADY_LOCKED); /* initiate DA mode */
 	}
 
 finalize_it:
@@ -469,6 +520,7 @@ static rsRetVal qConstructFixedArray(qqueue_t *pThis)
 		ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
 	}
 
+	pThis->tVars.farray.deqhead = 0;
 	pThis->tVars.farray.head = 0;
 	pThis->tVars.farray.tail = 0;
 
@@ -486,9 +538,7 @@ static rsRetVal qDestructFixedArray(qqueue_t *pThis)
 	ASSERT(pThis != NULL);
 
 	queueDrain(pThis); /* discard any remaining queue entries */
-
-	if(pThis->tVars.farray.pBuf != NULL)
-		free(pThis->tVars.farray.pBuf);
+	free(pThis->tVars.farray.pBuf);
 
 	RETiRet;
 }
@@ -507,76 +557,57 @@ static rsRetVal qAddFixedArray(qqueue_t *pThis, void* in)
 	RETiRet;
 }
 
-static rsRetVal qDelFixedArray(qqueue_t *pThis, void **out)
+
+static rsRetVal qDeqFixedArray(qqueue_t *pThis, void **out)
 {
 	DEFiRet;
 
 	ASSERT(pThis != NULL);
-	*out = (void*) pThis->tVars.farray.pBuf[pThis->tVars.farray.head];
+	*out = (void*) pThis->tVars.farray.pBuf[pThis->tVars.farray.deqhead];
 
-	pThis->tVars.farray.head++;
-	if (pThis->tVars.farray.head == pThis->iMaxQueueSize)
-		pThis->tVars.farray.head = 0;
+	pThis->tVars.farray.deqhead++;
+	if (pThis->tVars.farray.deqhead == pThis->iMaxQueueSize)
+		pThis->tVars.farray.deqhead = 0;
 
 	RETiRet;
 }
 
 
-/* -------------------- linked list  -------------------- */
-
-/* first some generic functions which are also used for the unget linked list */
-
-static inline rsRetVal qqueueAddLinkedList(qLinkedList_t **ppRoot, qLinkedList_t **ppLast, void* pUsr)
+static rsRetVal qDelFixedArray(qqueue_t *pThis)
 {
 	DEFiRet;
-	qLinkedList_t *pEntry;
-
-	ASSERT(ppRoot != NULL);
-	ASSERT(ppLast != NULL);
-
-	if((pEntry = (qLinkedList_t*) malloc(sizeof(qLinkedList_t))) == NULL) {
-		ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
-	}
 
-	pEntry->pNext = NULL;
-	pEntry->pUsr = pUsr;
+	ASSERT(pThis != NULL);
 
-	if(*ppRoot == NULL) {
-		*ppRoot = *ppLast = pEntry;
-	} else {
-		(*ppLast)->pNext = pEntry;
-		*ppLast = pEntry;
-	}
+	pThis->tVars.farray.head++;
+	if (pThis->tVars.farray.head == pThis->iMaxQueueSize)
+		pThis->tVars.farray.head = 0;
 
-finalize_it:
 	RETiRet;
 }
 
-static inline rsRetVal qqueueDelLinkedList(qLinkedList_t **ppRoot, qLinkedList_t **ppLast, obj_t **ppUsr)
+
+/* reset the logical dequeue pointer to the physical dequeue position.
+ * This is only needed after we cancelled workers (during queue shutdown).
+ */
+static rsRetVal
+qUnDeqAllFixedArray(qqueue_t *pThis)
 {
 	DEFiRet;
-	qLinkedList_t *pEntry;
 
-	ASSERT(ppRoot != NULL);
-	ASSERT(ppLast != NULL);
-	ASSERT(ppUsr != NULL);
-	ASSERT(*ppRoot != NULL);
-	
-	pEntry = *ppRoot;
-	*ppUsr = pEntry->pUsr;
+	ISOBJ_TYPE_assert(pThis, qqueue);
 
-	if(*ppRoot == *ppLast) {
-		*ppRoot = NULL;
-		*ppLast = NULL;
-	} else {
-		*ppRoot = pEntry->pNext;
-	}
-	free(pEntry);
+	DBGOPRINT((obj_t*) pThis, "resetting FixedArray deq index to %ld (was %ld), logical dequeue count %d\n",
+		  pThis->tVars.farray.head, pThis->tVars.farray.deqhead, pThis->nLogDeq);
+
+	pThis->tVars.farray.deqhead = pThis->tVars.farray.head;
+	pThis->nLogDeq = 0;
 
 	RETiRet;
 }
 
-/* end generic functions which are also used for the unget linked list */
+
+/* -------------------- linked list  -------------------- */
 
 
 static rsRetVal qConstructLinkedList(qqueue_t *pThis)
@@ -585,8 +616,9 @@ static rsRetVal qConstructLinkedList(qqueue_t *pThis)
 
 	ASSERT(pThis != NULL);
 
-	pThis->tVars.linklist.pRoot = 0;
-	pThis->tVars.linklist.pLast = 0;
+	pThis->tVars.linklist.pDeqRoot = NULL;
+	pThis->tVars.linklist.pDelRoot = NULL;
+	pThis->tVars.linklist.pLast = NULL;
 
 	qqueueChkIsDA(pThis);
 
@@ -609,54 +641,79 @@ static rsRetVal qDestructLinkedList(qqueue_t __attribute__((unused)) *pThis)
 
 static rsRetVal qAddLinkedList(qqueue_t *pThis, void* pUsr)
 {
-	DEFiRet;
-
-	iRet = qqueueAddLinkedList(&pThis->tVars.linklist.pRoot, &pThis->tVars.linklist.pLast, pUsr);
-#if 0
 	qLinkedList_t *pEntry;
+	DEFiRet;
 
-	ASSERT(pThis != NULL);
-	if((pEntry = (qLinkedList_t*) malloc(sizeof(qLinkedList_t))) == NULL) {
-		ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
-	}
+	CHKmalloc((pEntry = (qLinkedList_t*) malloc(sizeof(qLinkedList_t))));
 
 	pEntry->pNext = NULL;
 	pEntry->pUsr = pUsr;
 
-	if(pThis->tVars.linklist.pRoot == NULL) {
-		pThis->tVars.linklist.pRoot = pThis->tVars.linklist.pLast = pEntry;
+	if(pThis->tVars.linklist.pDelRoot == NULL) {
+		pThis->tVars.linklist.pDelRoot = pThis->tVars.linklist.pDeqRoot = pThis->tVars.linklist.pLast = pEntry;
 	} else {
 		pThis->tVars.linklist.pLast->pNext = pEntry;
 		pThis->tVars.linklist.pLast = pEntry;
 	}
 
+	if(pThis->tVars.linklist.pDeqRoot == NULL) {
+		pThis->tVars.linklist.pDeqRoot = pEntry;
+	}
+
 finalize_it:
-#endif
 	RETiRet;
 }
 
-static rsRetVal qDelLinkedList(qqueue_t *pThis, obj_t **ppUsr)
+
+static rsRetVal qDeqLinkedList(qqueue_t *pThis, obj_t **ppUsr)
 {
-	DEFiRet;
-	iRet = qqueueDelLinkedList(&pThis->tVars.linklist.pRoot, &pThis->tVars.linklist.pLast, ppUsr);
-#if 0
 	qLinkedList_t *pEntry;
+	DEFiRet;
 
-	ASSERT(pThis != NULL);
-	ASSERT(pThis->tVars.linklist.pRoot != NULL);
-	
-	pEntry = pThis->tVars.linklist.pRoot;
+	pEntry = pThis->tVars.linklist.pDeqRoot;
+	ISOBJ_TYPE_assert(pEntry->pUsr, msg);
 	*ppUsr = pEntry->pUsr;
+	pThis->tVars.linklist.pDeqRoot = pEntry->pNext;
+
+	RETiRet;
+}
+
+
+static rsRetVal qDelLinkedList(qqueue_t *pThis)
+{
+	qLinkedList_t *pEntry;
+	DEFiRet;
 
-	if(pThis->tVars.linklist.pRoot == pThis->tVars.linklist.pLast) {
-		pThis->tVars.linklist.pRoot = NULL;
-		pThis->tVars.linklist.pLast = NULL;
+	pEntry = pThis->tVars.linklist.pDelRoot;
+
+	if(pThis->tVars.linklist.pDelRoot == pThis->tVars.linklist.pLast) {
+		pThis->tVars.linklist.pDelRoot = pThis->tVars.linklist.pDeqRoot = pThis->tVars.linklist.pLast = NULL;
 	} else {
-		pThis->tVars.linklist.pRoot = pEntry->pNext;
+		pThis->tVars.linklist.pDelRoot = pEntry->pNext;
 	}
+
 	free(pEntry);
 
-#endif
+	RETiRet;
+}
+
+
+/* reset the logical dequeue pointer to the physical dequeue position.
+ * This is only needed after we cancelled workers (during queue shutdown).
+ */
+static rsRetVal
+qUnDeqAllLinkedList(qqueue_t *pThis)
+{
+	DEFiRet;
+
+	ASSERT(pThis != NULL);
+
+	DBGOPRINT((obj_t*) pThis, "resetting LinkedList deq ptr to %p (was %p), logical dequeue count %d\n",
+		  pThis->tVars.linklist.pDelRoot, pThis->tVars.linklist.pDeqRoot, pThis->nLogDeq);
+
+	pThis->tVars.linklist.pDeqRoot = pThis->tVars.linklist.pDelRoot;
+	pThis->nLogDeq = 0;
+
 	RETiRet;
 }
 
@@ -700,10 +757,10 @@ qqueueHaveQIF(qqueue_t *pThis)
 	/* check if the file exists */
 	if(stat((char*) pszQIFNam, &stat_buf) == -1) {
 		if(errno == ENOENT) {
-			dbgoprint((obj_t*) pThis, "no .qi file found\n");
+			DBGOPRINT((obj_t*) pThis, "no .qi file found\n");
 			ABORT_FINALIZE(RS_RET_FILE_NOT_FOUND);
 		} else {
-			dbgoprint((obj_t*) pThis, "error %d trying to access .qi file\n", errno);
+			DBGOPRINT((obj_t*) pThis, "error %d trying to access .qi file\n", errno);
 			ABORT_FINALIZE(RS_RET_IO_ERROR);
 		}
 	}
@@ -725,8 +782,6 @@ qqueueTryLoadPersistedInfo(qqueue_t *pThis)
 	uchar pszQIFNam[MAXFNAME];
 	size_t lenQIFNam;
 	struct stat stat_buf;
-	int iUngottenObjs;
-	obj_t *pUsr;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 
@@ -737,10 +792,10 @@ qqueueTryLoadPersistedInfo(qqueue_t *pThis)
 	/* check if the file exists */
 	if(stat((char*) pszQIFNam, &stat_buf) == -1) {
 		if(errno == ENOENT) {
-			dbgoprint((obj_t*) pThis, "clean startup, no .qi file found\n");
+			DBGOPRINT((obj_t*) pThis, "clean startup, no .qi file found\n");
 			ABORT_FINALIZE(RS_RET_FILE_NOT_FOUND);
 		} else {
-			dbgoprint((obj_t*) pThis, "error %d trying to access .qi file\n", errno);
+			DBGOPRINT((obj_t*) pThis, "error %d trying to access .qi file\n", errno);
 			ABORT_FINALIZE(RS_RET_IO_ERROR);
 		}
 	}
@@ -756,25 +811,22 @@ qqueueTryLoadPersistedInfo(qqueue_t *pThis)
 	/* first, we try to read the property bag for ourselfs */
 	CHKiRet(obj.DeserializePropBag((obj_t*) pThis, psQIF));
 	
-	/* then the ungotten object queue */
-	iUngottenObjs = pThis->iUngottenObjs;
-	pThis->iUngottenObjs = 0; /* will be incremented when we add objects! */
-
-	while(iUngottenObjs > 0) {
-		/* fill the queue from disk */
-		CHKiRet(obj.Deserialize((void*) &pUsr, (uchar*)"msg", psQIF, NULL, NULL));
-		qqueueUngetObj(pThis, pUsr, MUTEX_ALREADY_LOCKED);
-		--iUngottenObjs; /* one less */
-	}
-
-	/* and now the stream objects (some order as when persisted!) */
+	/* then the stream objects (same order as when persisted!) */
 	CHKiRet(obj.Deserialize(&pThis->tVars.disk.pWrite, (uchar*) "strm", psQIF,
 			       (rsRetVal(*)(obj_t*,void*))qqueueLoadPersStrmInfoFixup, pThis));
-	CHKiRet(obj.Deserialize(&pThis->tVars.disk.pRead, (uchar*) "strm", psQIF,
+	CHKiRet(obj.Deserialize(&pThis->tVars.disk.pReadDel, (uchar*) "strm", psQIF,
 			       (rsRetVal(*)(obj_t*,void*))qqueueLoadPersStrmInfoFixup, pThis));
 
+	/* create a duplicate for the read "pointer".
+	 */
+
+	CHKiRet(strm.Dup(pThis->tVars.disk.pReadDel, &pThis->tVars.disk.pReadDeq));
+	CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDeq, 0)); /* deq must NOT delete the files! */
+	CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pReadDeq));
+
 	CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pWrite));
-	CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pRead));
+	CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pReadDel));
+	CHKiRet(strm.SeekCurrOffs(pThis->tVars.disk.pReadDeq));
 
 	/* OK, we could successfully read the file, so we now can request that it be
 	 * deleted when we are done with the persisted information.
@@ -786,7 +838,7 @@ finalize_it:
 		strm.Destruct(&psQIF);
 
 	if(iRet != RS_RET_OK) {
-		dbgoprint((obj_t*) pThis, "error %d reading .qi file - can not read persisted info (if any)\n",
+		DBGOPRINT((obj_t*) pThis, "error %d reading .qi file - can not read persisted info (if any)\n",
 			  iRet);
 	}
 
@@ -826,18 +878,26 @@ static rsRetVal qConstructDisk(qqueue_t *pThis)
 		CHKiRet(strm.SetsType(pThis->tVars.disk.pWrite, STREAMTYPE_FILE_CIRCULAR));
 		CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pWrite));
 
-		CHKiRet(strm.Construct(&pThis->tVars.disk.pRead));
-		CHKiRet(strm.SetbSync(pThis->tVars.disk.pRead, pThis->bSyncQueueFiles));
-		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pRead, 1));
-		CHKiRet(strm.SetDir(pThis->tVars.disk.pRead, glbl.GetWorkDir(), strlen((char*)glbl.GetWorkDir())));
-		CHKiRet(strm.SetiMaxFiles(pThis->tVars.disk.pRead, 10000000));
-		CHKiRet(strm.SettOperationsMode(pThis->tVars.disk.pRead, STREAMMODE_READ));
-		CHKiRet(strm.SetsType(pThis->tVars.disk.pRead, STREAMTYPE_FILE_CIRCULAR));
-		CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pRead));
+		CHKiRet(strm.Construct(&pThis->tVars.disk.pReadDeq));
+		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDeq, 0));
+		CHKiRet(strm.SetDir(pThis->tVars.disk.pReadDeq, glbl.GetWorkDir(), strlen((char*)glbl.GetWorkDir())));
+		CHKiRet(strm.SetiMaxFiles(pThis->tVars.disk.pReadDeq, 10000000));
+		CHKiRet(strm.SettOperationsMode(pThis->tVars.disk.pReadDeq, STREAMMODE_READ));
+		CHKiRet(strm.SetsType(pThis->tVars.disk.pReadDeq, STREAMTYPE_FILE_CIRCULAR));
+		CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pReadDeq));
 
+		CHKiRet(strm.Construct(&pThis->tVars.disk.pReadDel));
+		CHKiRet(strm.SetbSync(pThis->tVars.disk.pReadDel, pThis->bSyncQueueFiles));
+		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDel, 1));
+		CHKiRet(strm.SetDir(pThis->tVars.disk.pReadDel, glbl.GetWorkDir(), strlen((char*)glbl.GetWorkDir())));
+		CHKiRet(strm.SetiMaxFiles(pThis->tVars.disk.pReadDel, 10000000));
+		CHKiRet(strm.SettOperationsMode(pThis->tVars.disk.pReadDel, STREAMMODE_READ));
+		CHKiRet(strm.SetsType(pThis->tVars.disk.pReadDel, STREAMTYPE_FILE_CIRCULAR));
+		CHKiRet(strm.ConstructFinalize(pThis->tVars.disk.pReadDel));
 
-		CHKiRet(strm.SetFName(pThis->tVars.disk.pWrite, pThis->pszFilePrefix, pThis->lenFilePrefix));
-		CHKiRet(strm.SetFName(pThis->tVars.disk.pRead,  pThis->pszFilePrefix, pThis->lenFilePrefix));
+		CHKiRet(strm.SetFName(pThis->tVars.disk.pWrite,   pThis->pszFilePrefix, pThis->lenFilePrefix));
+		CHKiRet(strm.SetFName(pThis->tVars.disk.pReadDeq, pThis->pszFilePrefix, pThis->lenFilePrefix));
+		CHKiRet(strm.SetFName(pThis->tVars.disk.pReadDel, pThis->pszFilePrefix, pThis->lenFilePrefix));
 	}
 
 	/* now we set (and overwrite in case of a persisted restart) some parameters which
@@ -846,7 +906,8 @@ static rsRetVal qConstructDisk(qqueue_t *pThis)
 	 * ability to read existing queue files. -- rgerhards, 2008-01-12
 	 */
 	CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pWrite, pThis->iMaxFileSize));
-	CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pRead, pThis->iMaxFileSize));
+	CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pReadDeq, pThis->iMaxFileSize));
+	CHKiRet(strm.SetiMaxFileSize(pThis->tVars.disk.pReadDel, pThis->iMaxFileSize));
 
 finalize_it:
 	RETiRet;
@@ -860,7 +921,8 @@ static rsRetVal qDestructDisk(qqueue_t *pThis)
 	ASSERT(pThis != NULL);
 	
 	strm.Destruct(&pThis->tVars.disk.pWrite);
-	strm.Destruct(&pThis->tVars.disk.pRead);
+	strm.Destruct(&pThis->tVars.disk.pReadDeq);
+	strm.Destruct(&pThis->tVars.disk.pReadDel);
 
 	RETiRet;
 }
@@ -885,23 +947,37 @@ static rsRetVal qAddDisk(qqueue_t *pThis, void* pUsr)
 	 */
 	objDestruct(pUsr);
 
-	dbgoprint((obj_t*) pThis, "write wrote %lld octets to disk, queue disk size now %lld octets\n",
+	DBGOPRINT((obj_t*) pThis, "write wrote %lld octets to disk, queue disk size now %lld octets\n",
 		   nWriteCount, pThis->tVars.disk.sizeOnDisk);
 
 finalize_it:
 	RETiRet;
 }
 
-static rsRetVal qDelDisk(qqueue_t *pThis, void **ppUsr)
+
+static rsRetVal qDeqDisk(qqueue_t *pThis, void **ppUsr)
 {
 	DEFiRet;
 
+	CHKiRet(obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pReadDeq, NULL, NULL));
+
+finalize_it:
+	RETiRet;
+}
+
+
+static rsRetVal qDelDisk(qqueue_t *pThis)
+{
+	obj_t *pDummyObj;	/* we need to deserialize it... */
+	DEFiRet;
+
 	int64 offsIn;
 	int64 offsOut;
 
-	CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pRead, &offsIn));
-	CHKiRet(obj.Deserialize(ppUsr, (uchar*) "msg", pThis->tVars.disk.pRead, NULL, NULL));
-	CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pRead, &offsOut));
+	CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pReadDel, &offsIn));
+	CHKiRet(obj.Deserialize(&pDummyObj, (uchar*) "msg", pThis->tVars.disk.pReadDel, NULL, NULL));
+	objDestruct(pDummyObj);
+	CHKiRet(strm.GetCurrOffset(pThis->tVars.disk.pReadDel, &offsOut));
 
 	/* This time it is a bit tricky: we free disk space only upon file deletion. So we need
 	 * to keep track of what we have read until we get an out-offset that is lower than the
@@ -913,7 +989,7 @@ static rsRetVal qDelDisk(qqueue_t *pThis, void **ppUsr)
 	} else {
 		pThis->tVars.disk.sizeOnDisk -= pThis->tVars.disk.bytesRead;
 		pThis->tVars.disk.bytesRead = offsOut;
-		dbgoprint((obj_t*) pThis, "a file has been deleted, now %lld octets disk space used\n", pThis->tVars.disk.sizeOnDisk);
+		DBGOPRINT((obj_t*) pThis, "a file has been deleted, now %lld octets disk space used\n", pThis->tVars.disk.sizeOnDisk);
 		/* awake possibly waiting enq process */
 		pthread_cond_signal(&pThis->notFull); /* we hold the mutex while we are in here! */
 	}
@@ -922,6 +998,17 @@ finalize_it:
 	RETiRet;
 }
 
+
+/* This is a dummy function for disks - we do not need to reset anything
+ * because everything is already persisted...
+ */
+static rsRetVal
+qUnDeqAllDisk(__attribute__((unused)) qqueue_t *pThis)
+{
+	return RS_RET_OK;
+}
+
+
 /* -------------------- direct (no queueing) -------------------- */
 static rsRetVal qConstructDirect(qqueue_t __attribute__((unused)) *pThis)
 {
@@ -936,6 +1023,8 @@ static rsRetVal qDestructDirect(qqueue_t __attribute__((unused)) *pThis)
 
 static rsRetVal qAddDirect(qqueue_t *pThis, void* pUsr)
 {
+	batch_t singleBatch;
+	batch_obj_t batchObj;
 	DEFiRet;
 
 	ASSERT(pThis != NULL);
@@ -945,70 +1034,33 @@ static rsRetVal qAddDirect(qqueue_t *pThis, void* pUsr)
 	 * mode the consumer probably has a lot to convey (which get's lost in the other modes
 	 * because they are asynchronous. But direct mode is deliberately synchronous.
 	 * rgerhards, 2008-02-12
+	 * We use our knowledge about the batch_t structure below, but without that, we
+	 * pay a too-large performance toll... -- rgerhards, 2009-04-22
 	 */
-	iRet = pThis->pConsumer(pThis->pUsr, pUsr);
+	batchObj.state = BATCH_STATE_RDY;
+	batchObj.pUsrp = (obj_t*) pUsr;
+	singleBatch.nElem = 1; /* there always is only one in direct mode */
+	singleBatch.pElem = &batchObj;
+	iRet = pThis->pConsumer(pThis->pUsr, &singleBatch);
+	objDestruct(pUsr);
 
 	RETiRet;
 }
 
-static rsRetVal qDelDirect(qqueue_t __attribute__((unused)) *pThis, __attribute__((unused)) void **out)
+
+static rsRetVal qDelDirect(qqueue_t __attribute__((unused)) *pThis)
 {
 	return RS_RET_OK;
 }
 
-
-/* --------------- end type-specific handlers -------------------- */
-
-
-/* unget a user pointer that has been dequeued. This functionality is especially important
- * for consumer cancel cleanup handlers. To support it, a short list of ungotten user pointers
- * is maintened in memory.
- * rgerhards, 2008-01-20
- */
 static rsRetVal
-qqueueUngetObj(qqueue_t *pThis, obj_t *pUsr, int bLockMutex)
+qUnDeqAllDirect(__attribute__((unused)) qqueue_t *pThis)
 {
-	DEFiRet;
-	DEFVARS_mutexProtection;
-
-	ISOBJ_TYPE_assert(pThis, qqueue);
-	ISOBJ_assert(pUsr); /* TODO: we aborted right at this place at least 3 times -- race? 2008-02-28, -03-10, -03-15
-			       The second time I noticed it the queue was in destruction with NO worker threads
-			       running. The pUsr ptr was totally off and provided no clue what it may be pointing
-			       at (except that it looked like the static data pool). Both times, the abort happend
-			       inside an action queue */
-
-	dbgoprint((obj_t*) pThis, "ungetting user object %s\n", obj.GetName(pUsr));
-	BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, bLockMutex);
-	iRet = qqueueAddLinkedList(&pThis->pUngetRoot, &pThis->pUngetLast, pUsr);
-	++pThis->iUngottenObjs;	/* indicate one more */
-	END_MTX_PROTECTED_OPERATIONS(pThis->mut);
-
-	RETiRet;
+	return RS_RET_OK;
 }
 
 
-/* dequeues a user pointer from the ungotten queue. Pointers from there should always be
- * dequeued first.
- *
- * This function must only be called when the mutex is locked!
- *
- * rgerhards, 2008-01-29
- */
-static rsRetVal
-qqueueGetUngottenObj(qqueue_t *pThis, obj_t **ppUsr)
-{
-	DEFiRet;
-
-	ISOBJ_TYPE_assert(pThis, qqueue);
-	ASSERT(ppUsr != NULL);
-
-	iRet = qqueueDelLinkedList(&pThis->pUngetRoot, &pThis->pUngetLast, ppUsr);
-	--pThis->iUngottenObjs;	/* indicate one less */
-	dbgoprint((obj_t*) pThis, "dequeued ungotten user object %s\n", obj.GetName(*ppUsr));
-
-	RETiRet;
-}
+/* --------------- end type-specific handlers -------------------- */
 
 
 /* generic code to add a queue entry
@@ -1027,7 +1079,8 @@ qqueueAdd(qqueue_t *pThis, void *pUsr)
 
 	if(pThis->qType != QUEUETYPE_DIRECT) {
 		ATOMIC_INC(pThis->iQueueSize);
-		dbgoprint((obj_t*) pThis, "entry added, size now %d entries\n", pThis->iQueueSize);
+		DBGOPRINT((obj_t*) pThis, "entry added, size now log %d, phys %d entries\n",
+			  getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
 	}
 
 finalize_it:
@@ -1035,12 +1088,10 @@ finalize_it:
 }
 
 
-/* generic code to remove a queue entry
- * rgerhards, 2008-01-29: we must first see if there is any object in the
- * ungotten list and, if so, dequeue it first.
+/* generic code to dequeue a queue entry
  */
 static rsRetVal
-qqueueDel(qqueue_t *pThis, void *pUsr)
+qqueueDeq(qqueue_t *pThis, void **ppUsr)
 {
 	DEFiRet;
 
@@ -1051,53 +1102,36 @@ qqueueDel(qqueue_t *pThis, void *pUsr)
 	 * If we decrement, however, we may lose a message. But that is better than
 	 * losing the whole process because it loops... -- rgerhards, 2008-01-03
 	 */
-	if(pThis->iUngottenObjs > 0) {
-		iRet = qqueueGetUngottenObj(pThis, (obj_t**) pUsr);
-	} else {
-		iRet = pThis->qDel(pThis, pUsr);
-		ATOMIC_DEC(pThis->iQueueSize);
-	}
+	iRet = pThis->qDeq(pThis, ppUsr);
+	ATOMIC_INC(pThis->nLogDeq);
 
-	dbgoprint((obj_t*) pThis, "entry deleted, state %d, size now %d entries\n",
-		  iRet, pThis->iQueueSize);
+//	DBGOPRINT((obj_t*) pThis, "entry deleted, size now log %d, phys %d entries\n",
+//		  getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
 
 	RETiRet;
 }
 
 
-/* This function shuts down all worker threads and waits until they
- * have terminated. If they timeout, they are cancelled. Parameters have been set
- * before this function is called so that DA queues will be fully persisted to
- * disk (if configured to do so).
- * rgerhards, 2008-01-24
- * Please note that this function shuts down BOTH the parent AND the child queue
- * in DA case. This is necessary because their timeouts are tightly coupled. Most
- * importantly, the timeouts would be applied twice (or logic be extremely
- * complex) if each would have its own shutdown. The function does not self check
- * this condition - the caller must make sure it is not called with a parent.
+/* Try to terminate queue worker threads within the regular shutdown interval.
+ * Both the regular and DA queue (if it exists) is waited for, but on the same timeout.
+ * After this function returns, the workers must either be finished or some force
+ * to finish them must be applied.
+ * This function also instructs the DA worker pool (if it exists) to terminate. This is done
+ * in preparation of final queue shutdown. 
+ * rgerhards, 2009-05-27
  */
-static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis)
+static rsRetVal
+tryShutdownWorkersWithinQueueTimeout(qqueue_t *pThis)
 {
-	DEFiRet;
-	DEFVARS_mutexProtection;
 	struct timespec tTimeout;
 	rsRetVal iRetLocal;
+	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 	ASSERT(pThis->pqParent == NULL); /* detect invalid calling sequence */
 
-	dbgoprint((obj_t*) pThis, "initiating worker thread shutdown sequence\n");
-
-	/* we reduce the low water mark in any case. This is not absolutely necessary, but
-	 * it is useful because we enable DA mode at several spots below and so we do not need
-	 * to think about the low water mark each time. 
-	 */
-	pThis->iHighWtrMrk = 1; /* if we do not do this, the DA queue will not stop! */
-	pThis->iLowWtrMrk = 0;
-
-	/* first try to shutdown the queue within the regular shutdown period */
-	BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX);	/* some workers may be running in parallel! */
-	if(qqueueGetOverallQueueSize(pThis) > 0) {
+	d_pthread_mutex_lock(pThis->mut);	/* some workers may be running in parallel! */
+	if(getPhysicalQueueSize(pThis) > 0) {
 		if(pThis->bRunsDA) {
 			/* We may have waited on the low water mark. As it may have changed, we
 			 * see if we reactivate the worker.
@@ -1105,7 +1139,7 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis)
 			wtpAdviseMaxWorkers(pThis->pWtpDA, 1);
 		}
 	}
-	END_MTX_PROTECTED_OPERATIONS(pThis->mut);
+	d_pthread_mutex_unlock(pThis->mut);
 
 	/* Now wait for the queue's workers to shut down. Note that we run into the code even if we just found
 	 * out there are no active workers - that doesn't matter: the wtp knows about that and so will
@@ -1124,151 +1158,212 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis)
 	 * shutdown of both the regular and DA queue on *the same* timeout.
 	 */
 	timeoutComp(&tTimeout, pThis->toQShutdown);
-	dbgoprint((obj_t*) pThis, "trying shutdown of regular workers\n");
+	DBGOPRINT((obj_t*) pThis, "trying shutdown of regular workers\n");
 	iRetLocal = wtpShutdownAll(pThis->pWtpReg, wtpState_SHUTDOWN, &tTimeout);
 	if(iRetLocal == RS_RET_TIMED_OUT) {
-		dbgoprint((obj_t*) pThis, "regular shutdown timed out on primary queue (this is OK)\n");
+		DBGOPRINT((obj_t*) pThis, "regular shutdown timed out on primary queue (this is OK)\n");
 	} else {
-		/* OK, the regular queue is now shut down. So we can now wait for the DA queue (if running DA) */
-		dbgoprint((obj_t*) pThis, "regular queue workers shut down.\n");
-		BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX);	/* some workers may be running in parallel! */
-		if(pThis->bRunsDA) {
-			END_MTX_PROTECTED_OPERATIONS(pThis->mut);
-			dbgoprint((obj_t*) pThis, "we have a DA queue (0x%lx), requesting its shutdown.\n",
-				 qqueueGetID(pThis->pqDA));
-			/* we use the same absolute timeout as above, so we do not use more than the configured
-			 * timeout interval!
-			 */
-			dbgoprint((obj_t*) pThis, "trying shutdown of DA workers\n");
-			iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN, &tTimeout);
-			if(iRetLocal == RS_RET_TIMED_OUT) {
-				dbgoprint((obj_t*) pThis, "shutdown timed out on DA queue (this is OK)\n");
-			}
-		} else {
-			END_MTX_PROTECTED_OPERATIONS(pThis->mut);
-		}
+		DBGOPRINT((obj_t*) pThis, "regular queue workers shut down.\n");
 	}
 
-	/* when we reach this point, both queues are either empty or the regular queue shutdown timeout
-	 * has expired. Now we need to check if we are configured to not loose messages. If so, we need
-	 * to persist the queue to disk (this is only possible if the queue is DA-enabled). We must also
-	 * set the primary queue to SHUTDOWN_IMMEDIATE, as it shall now terminate as soon as its consumer
-	 * is done. This is especially important as we otherwise may interfere with queue order while the
-	 * DA consumer is running. -- rgerhards, 2008-01-27
-	 * Note: there was a note that we should not wait eternally on the DA worker if we run in
-	 * enqueue-only note. I have reviewed the code and think there is no need for this check. Howerver,
-	 * I'd like to keep this note in here should we happen to run into some related trouble.
-	 * rgerhards, 2008-01-28
-	 */
-	wtpSetState(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE); /* set primary queue to shutdown only */
-
-	/* at this stage, we need to have the DA worker properly initialized and running (if there is one) */
-	if(pThis->bRunsDA)
-		qqueueWaitDAModeInitialized(pThis);
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX);	/* some workers may be running in parallel! */
-	/* optimize parameters for shutdown of DA-enabled queues */
-	if(pThis->bIsDA && qqueueGetOverallQueueSize(pThis) > 0 && pThis->bSaveOnShutdown) {
-		/* switch to enqueue-only mode so that no more actions happen */
-		if(pThis->bRunsDA == 0) {
-			qqueueInitDA(pThis, QUEUE_MODE_ENQONLY, MUTEX_ALREADY_LOCKED); /* switch to DA mode */
+	/* OK, the worker for the regular queue is processed, on the the DA queue regular worker. */
+	if(pThis->pqDA != NULL) {
+		DBGOPRINT((obj_t*) pThis, "we have a DA queue (0x%lx), requesting its shutdown.\n",
+			 qqueueGetID(pThis->pqDA));
+		/* we use the same absolute timeout as above, so we do not use more than the configured
+		 * timeout interval!
+		 */
+		DBGOPRINT((obj_t*) pThis, "trying shutdown of regular worker of DA queue\n");
+		iRetLocal = wtpShutdownAll(pThis->pqDA->pWtpReg, wtpState_SHUTDOWN, &tTimeout);
+		if(iRetLocal == RS_RET_TIMED_OUT) {
+			DBGOPRINT((obj_t*) pThis, "shutdown timed out on DA queue worker (this is OK)\n");
 		} else {
-			/* TODO: RACE: we may reach this point when the DA worker has been initialized (state 1)
-			 * but is not yet running (state 2). In this case, pThis->pqDA is NULL! rgerhards, 2008-02-27
-			 */
-			qqueueSetEnqOnly(pThis->pqDA, QUEUE_MODE_ENQONLY, MUTEX_ALREADY_LOCKED); /* switch to enqueue-only mode */
+			DBGOPRINT((obj_t*) pThis, "DA queue worker shut down.\n");
 		}
-		END_MTX_PROTECTED_OPERATIONS(pThis->mut);
-		/* make sure we do not timeout before we are done */
-		dbgoprint((obj_t*) pThis, "bSaveOnShutdown configured, eternal timeout set\n");
-		timeoutComp(&tTimeout, QUEUE_TIMEOUT_ETERNAL);
-		/* and run the primary queue's DA worker to drain the queue */
-		iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN, &tTimeout);
-		if(iRetLocal != RS_RET_OK) {
-			dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying to shut down primary queue in disk save mode, "
-				  "continuing, but results are unpredictable\n", iRetLocal);
+		/* we also instruct the DA worker pool to shutdown ASAP. If we need it for persisting
+		 * the queue, it is restarted at a later stage. We don't care here if a timeout happens.
+		 */
+		DBGOPRINT((obj_t*) pThis, "trying shutdown of main queue DA worker pool\n");
+		iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
+		if(iRetLocal == RS_RET_TIMED_OUT) {
+			DBGOPRINT((obj_t*) pThis, "shutdown timed out on main queue DA worker pool (this is OK)\n");
+		} else {
+			DBGOPRINT((obj_t*) pThis, "main queue DA worker pool shut down on first try.\n");
 		}
-	} else {
-		END_MTX_PROTECTED_OPERATIONS(pThis->mut);
 	}
 
-	/* now the primary queue is either empty, persisted to disk - or set to loose messages. So we
-	 * can now request immediate shutdown of any remaining workers. Note that if bSaveOnShutdown was set,
-	 * the queue is now empty. If regular workers are still running, and try to pull the next message,
-	 * they will automatically terminate as there no longer is any message left to process.
+	RETiRet;
+}
+
+
+/* Try to shut down regular and DA queue workers, within the action timeout 
+ * period. Note that the main queue DA worker is still unaffected (and may shuffle
+ * data to the disk queue while we terminate the other workers). Not finishing
+ * processing all messages is now OK (but they may be preserved later, depending
+ * on bSaveOnShutdown setting).
+ * rgerhards, 2009-05-27
+ */
+static rsRetVal
+tryShutdownWorkersWithinActionTimeout(qqueue_t *pThis)
+{
+	struct timespec tTimeout;
+	rsRetVal iRetLocal;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+	ASSERT(pThis->pqParent == NULL); /* detect invalid calling sequence */
+
+	/* instruct workers to finish ASAP, even if still work exists */
+	/* note that we modify bEnqOnly directly, because going through the method would
+	 * startup some workers again. So this is OK here. -- rgerhards, 2009-05-28
 	 */
-	BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX);	/* some workers may be running in parallel! */
-	if(qqueueGetOverallQueueSize(pThis) > 0) {
-		timeoutComp(&tTimeout, pThis->toActShutdown);
-		if(wtpGetCurNumWrkr(pThis->pWtpReg, LOCK_MUTEX) > 0) {
-			END_MTX_PROTECTED_OPERATIONS(pThis->mut);
-			dbgoprint((obj_t*) pThis, "trying immediate shutdown of regular workers\n");
-			iRetLocal = wtpShutdownAll(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
-			if(iRetLocal == RS_RET_TIMED_OUT) {
-				dbgoprint((obj_t*) pThis, "immediate shutdown timed out on primary queue (this is acceptable and "
-					  "triggers cancellation)\n");
-			} else if(iRetLocal != RS_RET_OK) {
-				dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the primary queue "
-					  "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal);
-			}
-			/* we need to re-aquire the mutex for the next check in this case! */
-			BEGIN_MTX_PROTECTED_OPERATIONS(pThis->mut, LOCK_MUTEX);	/* some workers may be running in parallel! */
+	pThis->bEnqOnly = 1;
+	/* need to set this so that the DA queue begins shutdown in parallel! */
+	if(pThis->pqDA != NULL) {
+		pThis->pqDA->bEnqOnly = 1;
+		wtpSetState(pThis->pqDA->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE);
+	}
+
+	/* now give the queue workers a last chance to gracefully shut down (based on action timeout setting) */
+	timeoutComp(&tTimeout, pThis->toActShutdown);
+	DBGOPRINT((obj_t*) pThis, "trying immediate shutdown of regular workers (if any)\n");
+	iRetLocal = wtpShutdownAll(pThis->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
+	if(iRetLocal == RS_RET_TIMED_OUT) {
+		DBGOPRINT((obj_t*) pThis, "immediate shutdown timed out on primary queue (this is acceptable and "
+			  "triggers cancellation)\n");
+	} else if(iRetLocal != RS_RET_OK) {
+		DBGOPRINT((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the primary queue "
+			  "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal);
+	}
+
+	if(pThis->pqDA != NULL) {
+		/* and now the same for the DA queue */
+		DBGOPRINT((obj_t*) pThis, "trying immediate shutdown of DA queue workers\n");
+		iRetLocal = wtpShutdownAll(pThis->pqDA->pWtpReg, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
+		if(iRetLocal == RS_RET_TIMED_OUT) {
+			DBGOPRINT((obj_t*) pThis, "immediate shutdown timed out on DA queue (this is acceptable "
+				  "and triggers cancellation)\n");
+		} else if(iRetLocal != RS_RET_OK) {
+			DBGOPRINT((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the DA "
+				  "queue in disk save mode. Continuing, but results are unpredictable\n", iRetLocal);
 		}
-		if(pThis->bIsDA && wtpGetCurNumWrkr(pThis->pWtpDA, LOCK_MUTEX) > 0) {
-			/* and now the same for the DA queue */
-			END_MTX_PROTECTED_OPERATIONS(pThis->mut);
-			dbgoprint((obj_t*) pThis, "trying immediate shutdown of DA workers\n");
-			iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
-			if(iRetLocal == RS_RET_TIMED_OUT) {
-				dbgoprint((obj_t*) pThis, "immediate shutdown timed out on DA queue (this is acceptable and "
-					  "triggers cancellation)\n");
-			} else if(iRetLocal != RS_RET_OK) {
-				dbgoprint((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the DA queue "
-					  "in disk save mode. Continuing, but results are unpredictable\n", iRetLocal);
-			}
+		/* and now we need to check the DA worker itself (the one that shuffles data to the disk). This
+		 * is necessary because we may be in a situation where the DA queue regular worker and the
+		 * main queue worker stopped rather quickly. In this case, there is almost no time (and
+		 * probably no thread switch!) between the point where we instructed the main queue DA
+		 * worker to shutdown and this code location. In consequence, it may not even have
+		 * noticed that it should should down, less acutally done this. So we provide it with a 
+		 * fixed 100ms timeout to try complete its work, what usually should be sufficient.
+		 * rgerhards, 2009-10-06
+		 */
+		timeoutComp(&tTimeout, 100);
+		DBGOPRINT((obj_t*) pThis, "last try for regular shutdown of main queue DA worker pool\n");
+		iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
+		if(iRetLocal == RS_RET_TIMED_OUT) {
+			DBGOPRINT((obj_t*) pThis, "shutdown timed out on main queue DA worker pool "
+					          "(this is not good, but probably OK)\n");
 		} else {
-			END_MTX_PROTECTED_OPERATIONS(pThis->mut);
+			DBGOPRINT((obj_t*) pThis, "main queue DA worker pool shut down.\n");
 		}
-	} else {
-		END_MTX_PROTECTED_OPERATIONS(pThis->mut);
 	}
 
+	RETiRet;
+}
+
+
+/* This function cancels all remaining regular workers for both the main and the DA
+ * queue. The main queue's DA worker pool continues to run (if it exists and is active).
+ * rgerhards, 2009-05-29
+ */
+static rsRetVal
+cancelWorkers(qqueue_t *pThis)
+{
+	rsRetVal iRetLocal;
+	DEFiRet;
+
 	/* Now queue workers should have terminated. If not, we need to cancel them as we have applied
 	 * all timeout setting. If any worker in any queue still executes, its consumer is possibly
-	 * long-running and cancelling is the only way to get rid of it. Note that the
-	 * cancellation handler will probably re-queue a user pointer, so the queue's enqueue
-	 * function is still needed (what is no problem as we do not yet destroy the queue - but I
-	 * thought it's a good idea to mention that fact). -- rgerhards, 2008-01-25
+	 * long-running and cancelling is the only way to get rid of it.
 	 */
-	dbgoprint((obj_t*) pThis, "checking to see if we need to cancel any worker threads of the primary queue\n");
+	DBGOPRINT((obj_t*) pThis, "checking to see if we need to cancel any worker threads of the primary queue\n");
 	iRetLocal = wtpCancelAll(pThis->pWtpReg); /* returns immediately if all threads already have terminated */
 	if(iRetLocal != RS_RET_OK) {
-		dbgoprint((obj_t*) pThis, "unexpected iRet state %d trying to cancel primary queue worker "
+		DBGOPRINT((obj_t*) pThis, "unexpected iRet state %d trying to cancel primary queue worker "
 			  "threads, continuing, but results are unpredictable\n", iRetLocal);
 	}
 
-
-	/* TODO: think: do we really need to do this here? Can't it happen on DA queue destruction? If we 
-	 * disable it, we get an assertion... I think this is OK, as we need to have a certain order and
-	 * canceling the DA workers here ensures that order. But in any instant, we may have a look at this
-	 * code after we have reaced the milestone. -- rgerhards, 2008-01-27
-	 */
 	/* ... and now the DA queue, if it exists (should always be after the primary one) */
 	if(pThis->pqDA != NULL) {
-		dbgoprint((obj_t*) pThis, "checking to see if we need to cancel any worker threads of the DA queue\n");
+		DBGOPRINT((obj_t*) pThis, "checking to see if we need to cancel any worker threads of the DA queue\n");
 		iRetLocal = wtpCancelAll(pThis->pqDA->pWtpReg); /* returns immediately if all threads already have terminated */
 		if(iRetLocal != RS_RET_OK) {
-			dbgoprint((obj_t*) pThis, "unexpected iRet state %d trying to cancel DA queue worker "
+			DBGOPRINT((obj_t*) pThis, "unexpected iRet state %d trying to cancel DA queue worker "
 				  "threads, continuing, but results are unpredictable\n", iRetLocal);
 		}
+
+		/* finally, we cancel the main queue's DA worker pool, if it still is running. It may be
+		 * restarted later to persist the queue. But we stop it, because otherwise we get into
+		 * big trouble when resetting the logical dequeue pointer. This operation can only be
+		 * done when *no* worker is running. So time for a shutdown... -- rgerhards, 2009-05-28
+		 */
+		DBGOPRINT((obj_t*) pThis, "checking to see if we need to cancel the main queue's DA worker pool\n");
+		iRetLocal = wtpCancelAll(pThis->pWtpDA); /* returns immediately if all threads already have terminated */
 	}
 
+	RETiRet;
+}
+
+
+/* This function shuts down all worker threads and waits until they
+ * have terminated. If they timeout, they are cancelled.
+ * rgerhards, 2008-01-24
+ * Please note that this function shuts down BOTH the parent AND the child queue
+ * in DA case. This is necessary because their timeouts are tightly coupled. Most
+ * importantly, the timeouts would be applied twice (or logic be extremely
+ * complex) if each would have its own shutdown. The function does not self check
+ * this condition - the caller must make sure it is not called with a parent.
+ * rgerhards, 2009-05-26: we do NO longer persist the queue here if bSaveOnShutdown
+ * is set. This must be handled by the caller. Not doing that cleans up the queue
+ * shutdown considerably. Also, older engines had a potential hang condition when
+ * the DA queue was already started and the DA worker configured for infinite
+ * retries and the action was during retry processing. This was a design issue,
+ * which is solved as of now. Note that the shutdown now may take a little bit
+ * longer, because we no longer can persist the queue in parallel to waiting
+ * on worker timeouts.
+ */
+static rsRetVal
+ShutdownWorkers(qqueue_t *pThis)
+{
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+	ASSERT(pThis->pqParent == NULL); /* detect invalid calling sequence */
+
+	DBGOPRINT((obj_t*) pThis, "initiating worker thread shutdown sequence\n");
+
+	/* we reduce the low water mark in any case. This is not absolutely necessary, but
+	 * it is useful because we enable DA mode at several spots below and so we do not need
+	 * to think about the low water mark each time. 
+	 */
+	pThis->iHighWtrMrk = 1; /* if we do not do this, the DA queue will not stop! */
+	pThis->iLowWtrMrk = 0;
+
+	CHKiRet(tryShutdownWorkersWithinQueueTimeout(pThis));
+
+	if(getPhysicalQueueSize(pThis) > 0) {
+		CHKiRet(tryShutdownWorkersWithinActionTimeout(pThis));
+	}
+
+	CHKiRet(cancelWorkers(pThis));
+
 	/* ... finally ... all worker threads have terminated :-)
 	 * Well, more precisely, they *are in termination*. Some cancel cleanup handlers
-	 * may still be running. 
+	 * may still be running. Note that the main queue's DA worker may still be running.
 	 */
-	dbgoprint((obj_t*) pThis, "worker threads terminated, remaining queue size %d.\n", qqueueGetOverallQueueSize(pThis));
+	DBGOPRINT((obj_t*) pThis, "worker threads terminated, remaining queue size log %d, phys %d.\n",
+		  getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
 
+finalize_it:
 	RETiRet;
 }
 
@@ -1280,7 +1375,7 @@ static rsRetVal qqueueShutdownWorkers(qqueue_t *pThis)
  * to modify some parameters before the queue is actually started.
  */
 rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThreads,
-		        int iMaxQueueSize, rsRetVal (*pConsumer)(void*,void*))
+		        int iMaxQueueSize, rsRetVal (*pConsumer)(void*, batch_t*))
 {
 	DEFiRet;
 	qqueue_t *pThis;
@@ -1305,10 +1400,12 @@ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThread
 	pThis->lenSpoolDir = strlen((char*)pThis->pszSpoolDir);
 	pThis->iMaxFileSize = 1024 * 1024; /* default is 1 MiB */
 	pThis->iQueueSize = 0;
+	pThis->nLogDeq = 0;
 	pThis->iMaxQueueSize = iMaxQueueSize;
 	pThis->pConsumer = pConsumer;
 	pThis->iNumWorkerThreads = iWorkerThreads;
 	pThis->iDeqtWinToHr = 25; /* disable time-windowed dequeuing by default */
+	pThis->iDeqBatchSize = 8; /* conservative default, should still provide good performance */
 
 	pThis->pszFilePrefix = NULL;
 	pThis->qType = qType;
@@ -1319,19 +1416,25 @@ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThread
 			pThis->qConstruct = qConstructFixedArray;
 			pThis->qDestruct = qDestructFixedArray;
 			pThis->qAdd = qAddFixedArray;
+			pThis->qDeq = qDeqFixedArray;
 			pThis->qDel = qDelFixedArray;
+			pThis->qUnDeqAll = qUnDeqAllFixedArray;
 			break;
 		case QUEUETYPE_LINKEDLIST:
 			pThis->qConstruct = qConstructLinkedList;
 			pThis->qDestruct = qDestructLinkedList;
 			pThis->qAdd = qAddLinkedList;
-			pThis->qDel = (rsRetVal (*)(qqueue_t*,void**)) qDelLinkedList;
+			pThis->qDeq = (rsRetVal (*)(qqueue_t*,void**)) qDeqLinkedList;
+			pThis->qDel = (rsRetVal (*)(qqueue_t*)) qDelLinkedList;
+			pThis->qUnDeqAll = qUnDeqAllLinkedList;
 			break;
 		case QUEUETYPE_DISK:
 			pThis->qConstruct = qConstructDisk;
 			pThis->qDestruct = qDestructDisk;
 			pThis->qAdd = qAddDisk;
+			pThis->qDeq = qDeqDisk;
 			pThis->qDel = qDelDisk;
+			pThis->qUnDeqAll = qUnDeqAllDisk;
 			/* special handling */
 			pThis->iNumWorkerThreads = 1; /* we need exactly one worker */
 			break;
@@ -1340,6 +1443,7 @@ rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThread
 			pThis->qDestruct = qDestructDirect;
 			pThis->qAdd = qAddDirect;
 			pThis->qDel = qDelDirect;
+			pThis->qUnDeqAll = qUnDeqAllDirect;
 			break;
 	}
 
@@ -1349,36 +1453,6 @@ finalize_it:
 }
 
 
-/* cancellation cleanup handler for queueWorker ()
- * Updates admin structure and frees ressources.
- * Params:
- * arg1 - user pointer (in this case a qqueue_t)
- * arg2 - user data pointer (in this case a queue data element, any object [queue's pUsr ptr!])
- * Note that arg2 may be NULL, in which case no dequeued but unprocessed pUsr exists!
- * rgerhards, 2008-01-16
- */
-static rsRetVal
-qqueueConsumerCancelCleanup(void *arg1, void *arg2)
-{
-	DEFiRet;
-
-	qqueue_t *pThis = (qqueue_t*) arg1;
-	obj_t *pUsr = (obj_t*) arg2;
-
-	ISOBJ_TYPE_assert(pThis, qqueue);
-
-	if(pUsr != NULL) {
-		/* make sure the data element is not lost */
-		dbgoprint((obj_t*) pThis, "cancelation cleanup handler consumer called, we need to unget one user data element\n");
-		CHKiRet(qqueueUngetObj(pThis, pUsr, LOCK_MUTEX));
-	}
-	
-finalize_it:
-	RETiRet;
-}
-
-
-
 /* This function checks if the provided message shall be discarded and does so, if needed.
  * In DA mode, we do not discard any messages as we assume the disk subsystem is fast enough to
  * provide real-time creation of spool files.
@@ -1404,12 +1478,12 @@ static int qqueueChkDiscardMsg(qqueue_t *pThis, int iQueueSize, int bRunsDA, voi
 	if(pThis->iDiscardMrk > 0 && iQueueSize >= pThis->iDiscardMrk && bRunsDA == 0) {
 		iRetLocal = objGetSeverity(pUsr, &iSeverity);
 		if(iRetLocal == RS_RET_OK && iSeverity >= pThis->iDiscardSeverity) {
-			dbgoprint((obj_t*) pThis, "queue nearly full (%d entries), discarded severity %d message\n",
+			DBGOPRINT((obj_t*) pThis, "queue nearly full (%d entries), discarded severity %d message\n",
 				  iQueueSize, iSeverity);
 			objDestruct(pUsr);
 			ABORT_FINALIZE(RS_RET_QUEUE_FULL);
 		} else {
-			dbgoprint((obj_t*) pThis, "queue nearly full (%d entries), but could not drop msg "
+			DBGOPRINT((obj_t*) pThis, "queue nearly full (%d entries), but could not drop msg "
 				  "(iRet: %d, severity %d)\n", iQueueSize, iRetLocal, iSeverity);
 		}
 	}
@@ -1419,38 +1493,171 @@ finalize_it:
 }
 
 
-/* dequeue the queued object for the queue consumers.
- * rgerhards, 2008-10-21
+/* Finally remove n elements from the queue store.
  */
-static rsRetVal
-qqueueDequeueConsumable(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave)
+static inline rsRetVal
+DoDeleteBatchFromQStore(qqueue_t *pThis, int nElem)
 {
+	int i;
 	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+
+	/* now send delete request to storage driver */
+	for(i = 0 ; i < nElem ; ++i) {
+		pThis->qDel(pThis);
+	}
+
+	/* iQueueSize is not decremented by qDel(), so we need to do it ourselves */
+	ATOMIC_SUB(pThis->iQueueSize, nElem);
+	ATOMIC_SUB(pThis->nLogDeq, nElem);
+dbgprintf("delete batch from store, new sizes: log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
+	++pThis->deqIDDel; /* one more batch dequeued */
+
+	RETiRet;
+}
+
+
+/* remove messages from the physical queue store that are fully processed. This is
+ * controlled via the to-delete list. We can only delete those elements, that are
+ * at the current physical tail of the queue. If the batch is from another position,
+ * we schedule it for deletion, but actual deletion will happen at a later call
+ * of this function here. We always delete as much as possible, which includes
+ * picking up things from the to-delete list.
+ */
+static inline rsRetVal
+DeleteBatchFromQStore(qqueue_t *pThis, batch_t *pBatch)
+{
+	toDeleteLst_t *pTdl;
+	qDeqID	deqIDDel;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+	assert(pBatch != NULL);
+
+	pTdl = tdlPeek(pThis); /* get current head element */
+	if(pTdl == NULL) { /* to-delete list empty */
+		DoDeleteBatchFromQStore(pThis, pBatch->nElemDeq);
+	} else if(pBatch->deqID == pThis->deqIDDel) {
+		deqIDDel = pThis->deqIDDel;
+		pTdl = tdlPeek(pThis);
+		while(pTdl != NULL && deqIDDel == pTdl->deqID) {
+			DoDeleteBatchFromQStore(pThis, pTdl->nElemDeq);
+			tdlPop(pThis);
+			++deqIDDel;
+			pTdl = tdlPeek(pThis);
+		}
+	} else {
+		/* can not delete, insert into to-delete list */
+		dbgprintf("not at head of to-delete list, enqueue %d\n", (int) pBatch->deqID);
+		CHKiRet(tdlAdd(pThis, pBatch->deqID, pBatch->nElemDeq));
+	}
+
+finalize_it:
+	RETiRet;
+}
+
+
+/* Delete a batch of processed user objects from the queue, which includes
+ * destructing the objects themself.
+ * rgerhards, 2009-05-13
+ */
+static inline rsRetVal
+DeleteProcessedBatch(qqueue_t *pThis, batch_t *pBatch)
+{
+	int i;
 	void *pUsr;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+	assert(pBatch != NULL);
+
+	for(i = 0 ; i < pBatch->nElem ; ++i) {
+		pUsr = pBatch->pElem[i].pUsrp;
+		objDestruct(pUsr);
+	}
+
+	iRet = DeleteBatchFromQStore(pThis, pBatch);
+
+	pBatch->nElem = pBatch->nElemDeq = 0; /* reset batch */
+
+	RETiRet;
+}
+
+
+/* dequeue as many user pointers as are available, until we hit the configured
+ * upper limit of pointers.
+ * This must only be called when the queue mutex is LOOKED, otherwise serious
+ * malfunction will happen.
+ */
+static inline rsRetVal
+DequeueConsumableElements(qqueue_t *pThis, wti_t *pWti, int *piRemainingQueueSize)
+{
+	int nDequeued;
+	int nDiscarded;
+	int nDeleted;
 	int iQueueSize;
-	int bRunsDA;	 /* cache for early mutex release */
-
-	/* dequeue element (still protected from mutex) */
-	iRet = qqueueDel(pThis, &pUsr);
-	qqueueChkPersist(pThis);
-	iQueueSize = qqueueGetOverallQueueSize(pThis); /* cache this for after mutex release */
-	bRunsDA = pThis->bRunsDA; /* cache this for after mutex release */
-
-	/* We now need to save the user pointer for the cancel cleanup handler, BUT ONLY
-	 * if we could successfully obtain a user pointer. Otherwise, we would bring the
-	 * cancel cleanup handler into big troubles (and we did ;)). Note that we can
-	 * NOT set the variable further below, as this may lead to an object leak. We 
-	 * may get cancelled before we reach that part of the code, so the only 
-	 * solution is to do it here. -- rgerhards, 2008-02-27
-	 */
-	if(iRet == RS_RET_OK) {
-		pWti->pUsrp = pUsr;
+	void *pUsr;
+	rsRetVal localRet;
+	DEFiRet;
+
+	nDeleted = pWti->batch.nElemDeq;
+	DeleteProcessedBatch(pThis, &pWti->batch);
+
+	nDequeued = nDiscarded = 0;
+	while((iQueueSize = getLogicalQueueSize(pThis)) > 0 && nDequeued < pThis->iDeqBatchSize) {
+dbgprintf("DequeueConsumableElements, index %d\n", nDequeued);
+		CHKiRet(qqueueDeq(pThis, &pUsr));
+
+		/* check if we should discard this element */
+		localRet = qqueueChkDiscardMsg(pThis, pThis->iQueueSize, pThis->bRunsDA, pUsr);
+		if(localRet == RS_RET_QUEUE_FULL) {
+			++nDiscarded;
+			continue;
+		} else if(localRet != RS_RET_OK) {
+			ABORT_FINALIZE(localRet);
+		}
+
+		/* all well, use this element */
+		pWti->batch.pElem[nDequeued].pUsrp = pUsr;
+		pWti->batch.pElem[nDequeued].state = BATCH_STATE_RDY;
+		++nDequeued;
 	}
 
+	/* it is sufficient to persist only when the bulk of work is done */
+	qqueueChkPersist(pThis, nDequeued+nDiscarded+nDeleted);
+
+	pWti->batch.nElem = nDequeued;
+	pWti->batch.nElemDeq = nDequeued + nDiscarded;
+	pWti->batch.deqID = getNextDeqID(pThis);
+	*piRemainingQueueSize = iQueueSize;
+
+finalize_it:
+	RETiRet;
+}
+
+
+/* dequeue the queued object for the queue consumers.
+ * rgerhards, 2008-10-21
+ * I made a radical change - we now dequeue multiple elements, and store these objects in
+ * an array of user pointers. We expect that this increases performance.
+ * rgerhards, 2009-04-22
+ */
+static rsRetVal
+DequeueConsumable(qqueue_t *pThis, wti_t *pWti)
+{
+	DEFiRet;
+	int iQueueSize = 0; /* keep the compiler happy... */
+
+	/* dequeue element batch (still protected from mutex) */
+	iRet = DequeueConsumableElements(pThis, pWti, &iQueueSize);
+
 	/* awake some flow-controlled sources if we can do this right now */
 	/* TODO: this could be done better from a performance point of view -- do it only if
 	 * we have someone waiting for the condition (or only when we hit the watermark right
 	 * on the nail [exact value]) -- rgerhards, 2008-03-14
+	 * now that we dequeue batches of pointers, this is much less an issue...
+	 * rgerhards, 2009-04-22
 	 */
 	if(iQueueSize < pThis->iFullDlyMrk / 2) {
 		pthread_cond_broadcast(&pThis->belowFullDlyWtrMrk);
@@ -1460,37 +1667,15 @@ qqueueDequeueConsumable(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave)
 		pthread_cond_broadcast(&pThis->belowLightDlyWtrMrk);
 	}
 
-	/* rgerhards, 2008-09-30: I reversed the order of cond_signal und mutex_unlock
-	 * as of the pthreads recommendation on predictable scheduling behaviour. I don't see
-	 * any problems caused by this, but I add this comment in case some will be seen
-	 * in the next time.
-	 */
+	// TODO: MULTI: check physical queue size?
 	pthread_cond_signal(&pThis->notFull);
-	d_pthread_mutex_unlock(pThis->mut);
-	pthread_setcancelstate(iCancelStateSave, NULL);
 	/* WE ARE NO LONGER PROTECTED BY THE MUTEX */
 
-	/* do actual processing (the lengthy part, runs in parallel)
-	 * If we had a problem while dequeing, we do not call the consumer,
-	 * but we otherwise ignore it. This is in the hopes that it will be
-	 * self-healing. However, this is really not a good thing.
-	 * rgerhards, 2008-01-03
-	 */
-	if(iRet != RS_RET_OK)
-		FINALIZE;
-
-	/* we are running in normal, non-disk-assisted mode do a quick check if we need to drain the queue.
-	 * In DA mode, we do not discard any messages as we assume the disk subsystem is fast enough to
-	 * provide real-time creation of spool files.
-	 * Note: It is OK to use the cached iQueueSize here, because it does not hurt if it is slightly wrong.
-	 */
-	CHKiRet(qqueueChkDiscardMsg(pThis, iQueueSize, bRunsDA, pUsr));
-
-finalize_it:
 	if(iRet != RS_RET_OK && iRet != RS_RET_DISCARDMSG) {
-		dbgoprint((obj_t*) pThis, "error %d dequeueing element - ignoring, but strange things "
+		DBGOPRINT((obj_t*) pThis, "error %d dequeueing element - ignoring, but strange things "
 			  "may happen\n", iRet);
 	}
+
 	RETiRet;
 }
 
@@ -1533,7 +1718,7 @@ finalize_it:
  * but you get the idea from the code above.
  */
 static rsRetVal
-qqueueRateLimiter(qqueue_t *pThis)
+RateLimiter(qqueue_t *pThis)
 {
 	DEFiRet;
 	int iDelay;
@@ -1582,7 +1767,7 @@ qqueueRateLimiter(qqueue_t *pThis)
 	}
 
 	if(iDelay > 0) {
-		dbgoprint((obj_t*) pThis, "outside dequeue time window, delaying %d seconds\n", iDelay);
+		DBGOPRINT((obj_t*) pThis, "outside dequeue time window, delaying %d seconds\n", iDelay);
 		srSleep(iDelay, 0);
 	}
 
@@ -1590,37 +1775,88 @@ qqueueRateLimiter(qqueue_t *pThis)
 }
 
 
+/* This dequeues the next batch.
+ * rgerhards, 2009-05-20
+ */
+static inline rsRetVal
+DequeueForConsumer(qqueue_t *pThis, wti_t *pWti)
+{
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+	ISOBJ_TYPE_assert(pWti, wti);
+
+dbgprintf("YYY: deqeueu for consumer");
+	CHKiRet(DequeueConsumable(pThis, pWti));
+
+	if(pWti->batch.nElem == 0)
+		ABORT_FINALIZE(RS_RET_IDLE);
+
+
+finalize_it:
+	RETiRet;
+}
+
+
+/* This is called when a batch is processed and the worker does not
+ * ask for another batch (e.g. because it is to be terminated)
+ * rgerhards, 2009-05-27
+ */
+static rsRetVal
+batchProcessed(qqueue_t *pThis, wti_t *pWti)
+{
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+	ISOBJ_TYPE_assert(pWti, wti);
+dbgprintf("XXX: batchProcessed deletes %d records\n", pWti->batch.nElemDeq);
+
+	DeleteProcessedBatch(pThis, &pWti->batch);
+	qqueueChkPersist(pThis, pWti->batch.nElemDeq);
+
+	RETiRet;
+}
+
 
 /* This is the queue consumer in the regular (non-DA) case. It is 
  * protected by the queue mutex, but MUST release it as soon as possible.
  * rgerhards, 2008-01-21
  */
 static rsRetVal
-qqueueConsumerReg(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave)
+ConsumerReg(qqueue_t *pThis, wti_t *pWti)
 {
 	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 	ISOBJ_TYPE_assert(pWti, wti);
 
-	CHKiRet(qqueueDequeueConsumable(pThis, pWti, iCancelStateSave));
-	CHKiRet(pThis->pConsumer(pThis->pUsr, pWti->pUsrp));
+	CHKiRet(DequeueForConsumer(pThis, pWti));
+
+	/* we now have a non-idle batch of work, so we can release the queue mutex and process it */
+	d_pthread_mutex_unlock(pThis->mut);
+
+	CHKiRet(pThis->pConsumer(pThis->pUsr, &pWti->batch));
 
 	/* we now need to check if we should deliberately delay processing a bit
 	 * and, if so, do that. -- rgerhards, 2008-01-30
 	 */
+//TODO: MULTIQUEUE: the following setting is no longer correct - need to think about how to do that...
 	if(pThis->iDeqSlowdown) {
-		dbgoprint((obj_t*) pThis, "sleeping %d microseconds as requested by config params\n",
+		DBGOPRINT((obj_t*) pThis, "sleeping %d microseconds as requested by config params\n",
 			  pThis->iDeqSlowdown);
 		srSleep(pThis->iDeqSlowdown / 1000000, pThis->iDeqSlowdown % 1000000);
 	}
 
+	/* now we are done, but need to re-aquire the mutex */
+	d_pthread_mutex_lock(pThis->mut);
+
 finalize_it:
+dbgprintf("XXX: regular consumer finished, iret=%d, szlog %d sz phys %d\n", iRet, getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
 	RETiRet;
 }
 
 
-/* This is a special consumer to feed the disk-queue in disk-assited mode.
+/* This is a special consumer to feed the disk-queue in disk-assisted mode.
  * When active, our own queue more or less acts as a memory buffer to the disk.
  * So this consumer just needs to drain the memory queue and submit entries
  * to the disk queue. The disk queue will then call the actual consumer from
@@ -1630,18 +1866,33 @@ finalize_it:
  * rgerhards, 2008-01-14
  */
 static rsRetVal
-qqueueConsumerDA(qqueue_t *pThis, wti_t *pWti, int iCancelStateSave)
+ConsumerDA(qqueue_t *pThis, wti_t *pWti)
 {
+	int i;
 	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 	ISOBJ_TYPE_assert(pWti, wti);
 
-	CHKiRet(qqueueDequeueConsumable(pThis, pWti, iCancelStateSave));
-	CHKiRet(qqueueEnqObj(pThis->pqDA, eFLOWCTL_NO_DELAY, pWti->pUsrp));
+	CHKiRet(DequeueForConsumer(pThis, pWti));
+
+	/* we now have a non-idle batch of work, so we can release the queue mutex and process it */
+	d_pthread_mutex_unlock(pThis->mut);
+
+	/* iterate over returned results and enqueue them in DA queue */
+	for(i = 0 ; i < pWti->batch.nElem ; i++) {
+		/* TODO: we must add a generic "addRef" mechanism, because the disk queue enqueue destructs
+		 * the message. So far, we simply assume we always have msg_t, what currently is always the case.
+		 * rgerhards, 2009-05-28
+		 */
+		CHKiRet(qqueueEnqObj(pThis->pqDA, eFLOWCTL_NO_DELAY, (obj_t*)MsgAddRef((msg_t*)(pWti->batch.pElem[i].pUsrp))));
+	}
+
+	/* now we are done, but need to re-aquire the mutex */
+	d_pthread_mutex_lock(pThis->mut);
 
 finalize_it:
-	dbgoprint((obj_t*) pThis, "DAConsumer returns with iRet %d\n", iRet);
+	DBGOPRINT((obj_t*) pThis, "DAConsumer returns with iRet %d\n", iRet);
 	RETiRet;
 }
 
@@ -1651,20 +1902,17 @@ finalize_it:
  * If we are a child, we have done our duty when the queue is empty. In that case,
  * we can terminate.
  * Version for the DA worker thread. NOTE: the pThis->bRunsDA is different from
- * the DA queue
+ * the DA queue.
+ * If our queue is in destruction, we drain to the DA queue and so we shall not terminate
+ * until we have done so.
  */
-static int
+static rsRetVal
 qqueueChkStopWrkrDA(qqueue_t *pThis)
 {
-	/* if our queue is in destruction, we drain to the DA queue and so we shall not terminate
-	 * until we have done so.
-	 */
-	int bStopWrkr;
-
-	BEGINfunc
+	DEFiRet;
 
 	if(pThis->bEnqOnly) {
-		bStopWrkr = 1;
+		iRet = RS_RET_TERMINATE_WHEN_IDLE;
 	} else {
 		if(pThis->bRunsDA) {
 			ASSERT(pThis->pqDA != NULL);
@@ -1672,19 +1920,21 @@ qqueueChkStopWrkrDA(qqueue_t *pThis)
 			   && pThis->pqDA->sizeOnDiskMax > 0
 			   && pThis->pqDA->tVars.disk.sizeOnDisk > pThis->pqDA->sizeOnDiskMax) {
 				/* this queue can never grow, so we can give up... */
-				bStopWrkr = 1;
-			} else if(qqueueGetOverallQueueSize(pThis) < pThis->iHighWtrMrk && pThis->bQueueStarted == 1) {
-				bStopWrkr = 1;
-			} else {
-				bStopWrkr = 0;
+				iRet = RS_RET_TERMINATE_NOW;
+			} else if(getPhysicalQueueSize(pThis) < pThis->iHighWtrMrk && pThis->bQueueStarted == 1) {
+dbgprintf("XXX: terminate_NOW DA worker: queue size %d, high water mark %d\n", getPhysicalQueueSize(pThis), pThis->iHighWtrMrk);
+				iRet = RS_RET_TERMINATE_NOW;
+RUNLOG_STR("XXX: re-start reg worker");
+qqueueAdviseMaxWorkers(pThis);
+RUNLOG_STR("XXX: done re-start reg worker");
 			}
 		} else {
-			bStopWrkr = 1;
+		// experimental	iRet = RS_RET_TERMINATE_NOW;
+		;
 		}
 	}
 
-	ENDfunc
-	return  bStopWrkr;
+	RETiRet;
 }
 
 
@@ -1695,38 +1945,50 @@ qqueueChkStopWrkrDA(qqueue_t *pThis)
  * Version for the regular worker thread. NOTE: the pThis->bRunsDA is different from
  * the DA queue
  */
-static int
-qqueueChkStopWrkrReg(qqueue_t *pThis)
+static rsRetVal
+ChkStopWrkrReg(qqueue_t *pThis)
+{
+	DEFiRet;
+	if(pThis->bEnqOnly) {
+		iRet = RS_RET_TERMINATE_NOW;
+	} else if(pThis->pqParent != NULL) {
+		iRet = RS_RET_TERMINATE_WHEN_IDLE;
+	}
+
+	RETiRet;
+}
+
+
+/* return the configured "deq max at once" interval
+ * rgerhards, 2009-04-22
+ */
+static rsRetVal
+GetDeqBatchSize(qqueue_t *pThis, int *pVal)
 {
-	return pThis->bEnqOnly || pThis->bRunsDA || (pThis->pqParent != NULL && qqueueGetOverallQueueSize(pThis) == 0);
+	DEFiRet;
+	assert(pVal != NULL);
+	*pVal = pThis->iDeqBatchSize;
+if(pThis->pqParent != NULL)
+	*pVal = 16;
+	RETiRet;
 }
 
 
 /* must only be called when the queue mutex is locked, else results
- * are not stable! DA queue version
+ * are not stable! DA worker version (pThis *is* the *main* queue, not DA!)
  */
 static int
 qqueueIsIdleDA(qqueue_t *pThis)
 {
-	/* remember: iQueueSize is the DA queue size, not the main queue! */
-	/* TODO: I think we need just a single function for DA and non-DA mode - but I leave it for now as is */
-	return(qqueueGetOverallQueueSize(pThis) == 0 || (pThis->bRunsDA && qqueueGetOverallQueueSize(pThis) <= pThis->iLowWtrMrk));
+	return(getPhysicalQueueSize(pThis) <= pThis->iLowWtrMrk);
 }
 /* must only be called when the queue mutex is locked, else results
- * are not stable! Regular queue version
+ * are not stable! Regular worker version.
  */
 static int
-qqueueIsIdleReg(qqueue_t *pThis)
-{
-#if 0 /* enable for performance testing */
-	int ret;
-	ret = qqueueGetOverallQueueSize(pThis) == 0 || (pThis->bRunsDA && qqueueGetOverallQueueSize(pThis) <= pThis->iLowWtrMrk);
-	if(ret) fprintf(stderr, "queue is idle\n");
-	return ret;
-#else 
-	/* regular code! */
-	return(qqueueGetOverallQueueSize(pThis) == 0 || (pThis->bRunsDA && qqueueGetOverallQueueSize(pThis) <= pThis->iLowWtrMrk));
-#endif
+IsIdleReg(qqueue_t *pThis)
+{
+	return(getPhysicalQueueSize(pThis) == 0);
 }
 
 
@@ -1744,14 +2006,13 @@ qqueueIsIdleReg(qqueue_t *pThis)
  * I am telling this, because I, too, always get confused by those...
  */
 static rsRetVal
-qqueueRegOnWrkrShutdown(qqueue_t *pThis)
+RegOnWrkrShutdown(qqueue_t *pThis)
 {
 	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, qqueue);
 
 	if(pThis->pqParent != NULL) {
-		pThis->pqParent->bChildIsDone = 1; /* indicate we are done */
 		if(pThis->pqParent->pWtpDA != NULL) { /* see comment in function header from 2008-02-27 */
 			wtpAdviseMaxWorkers(pThis->pqParent->pWtpDA, 1); /* reactivate DA worker (always 1) */
 		}
@@ -1761,28 +2022,11 @@ qqueueRegOnWrkrShutdown(qqueue_t *pThis)
 }
 
 
-/* The following function is called when a regular queue worker starts up. We need this
- * hook to indicate in the parent queue (if we are a child) that we are not done yet.
- */
-static rsRetVal
-qqueueRegOnWrkrStartup(qqueue_t *pThis)
-{
-	DEFiRet;
-
-	ISOBJ_TYPE_assert(pThis, qqueue);
-
-	if(pThis->pqParent != NULL) {
-		pThis->pqParent->bChildIsDone = 0;
-	}
-
-	RETiRet;
-}
-
-
 /* start up the queue - it must have been constructed and parameters defined
  * before.
  */
-rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */
+rsRetVal
+qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */
 {
 	DEFiRet;
 	rsRetVal iRetLocal;
@@ -1804,7 +2048,7 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */
 		pthread_mutex_init(pThis->mut, NULL);
 	} else {
 		/* child queue, we need to use parent's mutex */
-		dbgoprint((obj_t*) pThis, "I am a child\n");
+		DBGOPRINT((obj_t*) pThis, "I am a child\n");
 		pThis->mut = pThis->pqParent->mut;
 	}
 
@@ -1818,11 +2062,12 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */
 	/* call type-specific constructor */
 	CHKiRet(pThis->qConstruct(pThis)); /* this also sets bIsDA */
 
-	dbgoprint((obj_t*) pThis, "type %d, enq-only %d, disk assisted %d, maxFileSz %lld, qsize %d, child %d, "
-				  "full delay %d, light delay %d starting\n",
+	DBGOPRINT((obj_t*) pThis, "type %d, enq-only %d, disk assisted %d, maxFileSz %lld, lqsize %d, pqsize %d, child %d, "
+				  "full delay %d, light delay %d, deq batch size %d starting\n",
 		  pThis->qType, pThis->bEnqOnly, pThis->bIsDA, pThis->iMaxFileSize,
-		  qqueueGetOverallQueueSize(pThis), pThis->pqParent == NULL ? 0 : 1,
-		  pThis->iFullDlyMrk, pThis->iLightDlyMrk);
+		  getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis),
+		  pThis->pqParent == NULL ? 0 : 1, pThis->iFullDlyMrk, pThis->iLightDlyMrk,
+		  pThis->iDeqBatchSize);
 
 	if(pThis->qType == QUEUETYPE_DIRECT)
 		FINALIZE;	/* with direct queues, we are already finished... */
@@ -1833,13 +2078,13 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */
 	lenBuf = snprintf((char*)pszBuf, sizeof(pszBuf), "%s:Reg", obj.GetName((obj_t*) pThis));
 	CHKiRet(wtpConstruct		(&pThis->pWtpReg));
 	CHKiRet(wtpSetDbgHdr		(pThis->pWtpReg, pszBuf, lenBuf));
-	CHKiRet(wtpSetpfRateLimiter	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) qqueueRateLimiter));
-	CHKiRet(wtpSetpfChkStopWrkr	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int)) qqueueChkStopWrkrReg));
-	CHKiRet(wtpSetpfIsIdle		(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int)) qqueueIsIdleReg));
-	CHKiRet(wtpSetpfDoWork		(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, void *pWti, int)) qqueueConsumerReg));
-	CHKiRet(wtpSetpfOnWorkerCancel	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, void*pWti))qqueueConsumerCancelCleanup));
-	CHKiRet(wtpSetpfOnWorkerStartup	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) qqueueRegOnWrkrStartup));
-	CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) qqueueRegOnWrkrShutdown));
+	CHKiRet(wtpSetpfRateLimiter	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) RateLimiter));
+	CHKiRet(wtpSetpfChkStopWrkr	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int)) ChkStopWrkrReg));
+	CHKiRet(wtpSetpfGetDeqBatchSize	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, int*)) GetDeqBatchSize));
+	CHKiRet(wtpSetpfIsIdle		(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, wtp_t*)) IsIdleReg));
+	CHKiRet(wtpSetpfDoWork		(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, void *pWti)) ConsumerReg));
+	CHKiRet(wtpSetpfObjProcessed	(pThis->pWtpReg, (rsRetVal (*)(void *pUsr, wti_t *pWti)) batchProcessed));
+	CHKiRet(wtpSetpfOnWorkerShutdown(pThis->pWtpReg, (rsRetVal (*)(void *pUsr)) RegOnWrkrShutdown));
 	CHKiRet(wtpSetpmutUsr		(pThis->pWtpReg, pThis->mut));
 	CHKiRet(wtpSetpcondBusy		(pThis->pWtpReg, &pThis->notEmpty));
 	CHKiRet(wtpSetiNumWorkerThreads	(pThis->pWtpReg, pThis->iNumWorkerThreads));
@@ -1854,18 +2099,18 @@ rsRetVal qqueueStart(qqueue_t *pThis) /* this is the ConstructionFinalizer */
 		 */
 		iRetLocal = qqueueHaveQIF(pThis);
 		if(iRetLocal == RS_RET_OK) {
-			dbgoprint((obj_t*) pThis, "on-disk queue present, needs to be reloaded\n");
-			qqueueInitDA(pThis, QUEUE_MODE_ENQDEQ, LOCK_MUTEX); /* initiate DA mode */
+			DBGOPRINT((obj_t*) pThis, "on-disk queue present, needs to be reloaded\n");
+			InitDA(pThis, QUEUE_MODE_ENQDEQ, LOCK_MUTEX); /* initiate DA mode */
 			bInitialized = 1; /* we are done */
 		} else {
 			/* TODO: use logerror? -- rgerhards, 2008-01-16 */
-			dbgoprint((obj_t*) pThis, "error %d trying to access on-disk queue files, starting without them. "
+			DBGOPRINT((obj_t*) pThis, "error %d trying to access on-disk queue files, starting without them. "
 			          "Some data may be lost\n", iRetLocal);
 		}
 	}
 
-	if(!bInitialized) {
-		dbgoprint((obj_t*) pThis, "queue starts up without (loading) any DA disk state (this is normal for the DA "
+	if(Debug && !bInitialized) {
+		DBGOPRINT((obj_t*) pThis, "queue starts up without (loading) any DA disk state (this is normal for the DA "
 			  "queue itself!)\n");
 	}
 
@@ -1893,12 +2138,11 @@ static rsRetVal qqueuePersist(qqueue_t *pThis, int bIsCheckpoint)
 	strm_t *psQIF = NULL; /* Queue Info File */
 	uchar pszQIFNam[MAXFNAME];
 	size_t lenQIFNam;
-	obj_t *pUsr;
 
 	ASSERT(pThis != NULL);
 
 	if(pThis->qType != QUEUETYPE_DISK) {
-		if(qqueueGetOverallQueueSize(pThis) > 0) {
+		if(getPhysicalQueueSize(pThis) > 0) {
 			/* This error code is OK, but we will probably not implement this any time
  			 * The reason is that persistence happens via DA queues. But I would like to
 			 * leave the code as is, as we so have a hook in case we need one.
@@ -1909,19 +2153,19 @@ static rsRetVal qqueuePersist(qqueue_t *pThis, int bIsCheckpoint)
 			FINALIZE; /* if the queue is empty, we are happy and done... */
 	}
 
-	dbgoprint((obj_t*) pThis, "persisting queue to disk, %d entries...\n", qqueueGetOverallQueueSize(pThis));
+	DBGOPRINT((obj_t*) pThis, "persisting queue to disk, %d entries...\n", getPhysicalQueueSize(pThis));
 
 	/* Construct file name */
 	lenQIFNam = snprintf((char*)pszQIFNam, sizeof(pszQIFNam) / sizeof(uchar), "%s/%s.qi",
 			     (char*) glbl.GetWorkDir(), (char*)pThis->pszFilePrefix);
 
-	if((bIsCheckpoint != QUEUE_CHECKPOINT) && (qqueueGetOverallQueueSize(pThis) == 0)) {
+	if((bIsCheckpoint != QUEUE_CHECKPOINT) && (getPhysicalQueueSize(pThis) == 0)) {
 		if(pThis->bNeedDelQIF) {
 			unlink((char*)pszQIFNam);
 			pThis->bNeedDelQIF = 0;
 		}
 		/* indicate spool file needs to be deleted */
-		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pRead, 1));
+		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDel, 1));
 		FINALIZE; /* nothing left to do, so be happy */
 	}
 
@@ -1940,29 +2184,19 @@ static rsRetVal qqueuePersist(qqueue_t *pThis, int bIsCheckpoint)
 	 */
 	CHKiRet(obj.BeginSerializePropBag(psQIF, (obj_t*) pThis));
 	objSerializeSCALAR(psQIF, iQueueSize, INT);
-	objSerializeSCALAR(psQIF, iUngottenObjs, INT);
 	objSerializeSCALAR(psQIF, tVars.disk.sizeOnDisk, INT64);
 	objSerializeSCALAR(psQIF, tVars.disk.bytesRead, INT64);
 	CHKiRet(obj.EndSerialize(psQIF));
 
-	/* now we must persist all objects on the ungotten queue - they can not go to
-	 * to the regular files. -- rgerhards, 2008-01-29
-	 */
-	while(pThis->iUngottenObjs > 0) {
-		CHKiRet(qqueueGetUngottenObj(pThis, &pUsr));
-		CHKiRet((objSerialize(pUsr))(pUsr, psQIF));
-		objDestruct(pUsr);
-	}
-
 	/* now persist the stream info */
 	CHKiRet(strm.Serialize(pThis->tVars.disk.pWrite, psQIF));
-	CHKiRet(strm.Serialize(pThis->tVars.disk.pRead, psQIF));
+	CHKiRet(strm.Serialize(pThis->tVars.disk.pReadDel, psQIF));
 	
 	/* tell the input file object that it must not delete the file on close if the queue
 	 * is non-empty - but only if we are not during a simple checkpoint
 	 */
 	if(bIsCheckpoint != QUEUE_CHECKPOINT) {
-		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pRead, 0));
+		CHKiRet(strm.SetbDeleteOnClose(pThis->tVars.disk.pReadDel, 0));
 	}
 
 	/* we have persisted the queue object. So whenever it comes to an empty queue,
@@ -1979,20 +2213,63 @@ finalize_it:
 
 
 /* check if we need to persist the current queue info. If an
- * error occurs, thus should be ignored by caller (but we still
+ * error occurs, this should be ignored by caller (but we still
  * abide to our regular call interface)...
  * rgerhards, 2008-01-13
+ * nUpdates is the number of updates since the last call to this function.
+ * It may be > 1 due to batches. -- rgerhards, 2009-05-12
  */
-static rsRetVal qqueueChkPersist(qqueue_t *pThis)
+static rsRetVal qqueueChkPersist(qqueue_t *pThis, int nUpdates)
 {
+	DEFiRet;
 	ISOBJ_TYPE_assert(pThis, qqueue);
+	assert(nUpdates >= 0);
 
-	if(pThis->iPersistUpdCnt && ++pThis->iUpdsSincePersist >= pThis->iPersistUpdCnt) {
+	if(nUpdates == 0)
+		FINALIZE;
+
+	pThis->iUpdsSincePersist += nUpdates;
+	if(pThis->iPersistUpdCnt && pThis->iUpdsSincePersist >= pThis->iPersistUpdCnt) {
 		qqueuePersist(pThis, QUEUE_CHECKPOINT);
 		pThis->iUpdsSincePersist = 0;
 	}
 
-	return RS_RET_OK;
+finalize_it:
+	RETiRet;
+}
+
+
+/* persist a queue with all data elements to disk - this is used to handle
+ * bSaveOnShutdown. We utilize the DA worker to do this. This must only
+ * be called after all workers have been shut down and if bSaveOnShutdown
+ * is actually set. Note that this function may potentially run long,
+ * depending on the queue configuration (e.g. store on remote machine).
+ * rgerhards, 2009-05-26
+ */
+static inline rsRetVal
+DoSaveOnShutdown(qqueue_t *pThis)
+{
+	struct timespec tTimeout;
+	rsRetVal iRetLocal;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+
+	InitDA(pThis, QUEUE_MODE_ENQONLY, LOCK_MUTEX); /* switch to DA mode */
+dbgprintf("after InitDA, queue log %d, phys %d\n", getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
+	/* make sure we do not timeout before we are done */
+	DBGOPRINT((obj_t*) pThis, "bSaveOnShutdown configured, infinite timeout set\n");
+	timeoutComp(&tTimeout, QUEUE_TIMEOUT_ETERNAL);
+	/* and run the primary queue's DA worker to drain the queue */
+	iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN, &tTimeout);
+	DBGOPRINT((obj_t*) pThis, "end queue persistence run, iRet %d, queue size log %d, phys %d\n",
+		  iRetLocal, getLogicalQueueSize(pThis), getPhysicalQueueSize(pThis));
+	if(iRetLocal != RS_RET_OK) {
+		DBGOPRINT((obj_t*) pThis, "unexpected iRet state %d after trying to shut down primary queue in disk save mode, "
+			  "continuing, but results are unpredictable\n", iRetLocal);
+	}
+
+	RETiRet;
 }
 
 
@@ -2001,14 +2278,24 @@ BEGINobjDestruct(qqueue) /* be sure to specify the object type also in END and C
 CODESTARTobjDestruct(qqueue)
 	pThis->bQueueInDestruction = 1; /* indicate we are in destruction (modifies some behaviour) */
 
-	/* shut down all workers (handles *all* of the persistence logic)
-	 * See function head comment of queueShutdownWorkers () on why we don't call it
-	 * We also do not need to shutdown workers when we are in enqueue-only mode or we are a
+	/* shut down all workers
+	 * We do not need to shutdown workers when we are in enqueue-only mode or we are a
 	 * direct queue - because in both cases we have none... ;)
 	 * with a child! -- rgerhards, 2008-01-28
 	 */
 	if(pThis->qType != QUEUETYPE_DIRECT && !pThis->bEnqOnly && pThis->pqParent == NULL)
-		qqueueShutdownWorkers(pThis);
+		ShutdownWorkers(pThis);
+
+	/* now all workers are terminated. Messages may exist. Also, some logically dequeued
+	 * messages may never have been processed because their worker was terminated. So
+	 * we need to reset the logical dequeue pointer, persist the queue if configured to do
+	 * so and then destruct everything. -- rgerhards, 2009-05-26
+	 */
+	CHKiRet(pThis->qUnDeqAll(pThis));
+
+	if(pThis->bIsDA && getPhysicalQueueSize(pThis) > 0 && pThis->bSaveOnShutdown) {
+		CHKiRet(DoSaveOnShutdown(pThis));
+	}
 
 	/* finally destruct our (regular) worker thread pool
 	 * Note: currently pWtpReg is never NULL, but if we optimize our logic, this may happen,
@@ -2044,7 +2331,7 @@ CODESTARTobjDestruct(qqueue)
 	 * if need arises (what I doubt...) -- rgerhards, 2008-01-25
 	 */
 	CHKiRet_Hdlr(qqueuePersist(pThis, QUEUE_NO_CHECKPOINT)) {
-		dbgoprint((obj_t*) pThis, "error %d persisting queue - data lost!\n", iRet);
+		DBGOPRINT((obj_t*) pThis, "error %d persisting queue - data lost!\n", iRet);
 	}
 
 	/* finally, clean up some simple things... */
@@ -2063,11 +2350,8 @@ CODESTARTobjDestruct(qqueue)
 	/* type-specific destructor */
 	iRet = pThis->qDestruct(pThis);
 
-	if(pThis->pszFilePrefix != NULL)
-		free(pThis->pszFilePrefix);
-
-	if(pThis->pszSpoolDir != NULL)
-		free(pThis->pszSpoolDir);
+	free(pThis->pszFilePrefix);
+	free(pThis->pszSpoolDir);
 ENDobjDestruct(qqueue)
 
 
@@ -2081,8 +2365,8 @@ qqueueSetFilePrefix(qqueue_t *pThis, uchar *pszPrefix, size_t iLenPrefix)
 {
 	DEFiRet;
 
-	if(pThis->pszFilePrefix != NULL)
-		free(pThis->pszFilePrefix);
+	free(pThis->pszFilePrefix);
+	pThis->pszFilePrefix = NULL;
 
 	if(pszPrefix == NULL) /* just unset the prefix! */
 		ABORT_FINALIZE(RS_RET_OK);
@@ -2117,111 +2401,7 @@ finalize_it:
 }
 
 
-/* enqueue a new user data element
- * Enqueues the new element and awakes worker thread.
- */
-rsRetVal
-qqueueEnqObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
-{
-	DEFiRet;
-	int iCancelStateSave;
-	struct timespec t;
-
-	ISOBJ_TYPE_assert(pThis, qqueue);
-
-	/* first check if we need to discard this message (which will cause CHKiRet() to exit)
-	 * rgerhards, 2008-10-07: It is OK to do this outside of mutex protection. The iQueueSize
-	 * and bRunsDA parameters may not reflect the correct settings here, but they are
-	 * "good enough" in the sense that they can be used to drive the decision. Valgrind's
-	 * threading tools may point this access to be an error, but this is done
-	 * intentional. I do not see this causes problems to us.
-	 */
-	CHKiRet(qqueueChkDiscardMsg(pThis, pThis->iQueueSize, pThis->bRunsDA, pUsr));
-
-	/* Please note that this function is not cancel-safe and consequently
-	 * sets the calling thread's cancelibility state to PTHREAD_CANCEL_DISABLE
-	 * during its execution. If that is not done, race conditions occur if the
-	 * thread is canceled (most important use case is input module termination).
-	 * rgerhards, 2008-01-08
-	 */
-	if(pThis->qType != QUEUETYPE_DIRECT) {
-		pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave);
-		d_pthread_mutex_lock(pThis->mut);
-	}
-
-	/* then check if we need to add an assistance disk queue */
-	if(pThis->bIsDA)
-		CHKiRet(qqueueChkStrtDA(pThis));
-	
-	/* handle flow control
-	 * There are two different flow control mechanisms: basic and advanced flow control.
-	 * Basic flow control has always been implemented and protects the queue structures
-	 * in that it makes sure no more data is enqueued than the queue is configured to
-	 * support. Enhanced flow control is being added today. There are some sources which
-	 * can easily be stopped, e.g. a file reader. This is the case because it is unlikely
-	 * that blocking those sources will have negative effects (after all, the file is
-	 * continued to be written). Other sources can somewhat be blocked (e.g. the kernel
-	 * log reader or the local log stream reader): in general, nothing is lost if messages
-	 * from these sources are not picked up immediately. HOWEVER, they can not block for
-	 * an extended period of time, as this either causes message loss or - even worse - some
-	 * other bad effects (e.g. unresponsive system in respect to the main system log socket).
-	 * Finally, there are some (few) sources which can not be blocked at all. UDP syslog is
-	 * a prime example. If a UDP message is not received, it is simply lost. So we can't
-	 * do anything against UDP sockets that come in too fast. The core idea of advanced
-	 * flow control is that we take into account the different natures of the sources and
-	 * select flow control mechanisms that fit these needs. This also means, in the end
-	 * result, that non-blockable sources like UDP syslog receive priority in the system.
-	 * It's a side effect, but a good one ;) -- rgerhards, 2008-03-14
-	 */
-	if(flowCtlType == eFLOWCTL_FULL_DELAY) {
-		while(pThis->iQueueSize >= pThis->iFullDlyMrk) {
-			dbgoprint((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message - blocking.\n");
-			pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); /* TODO error check? But what do then? */
-		}
-	} else if(flowCtlType == eFLOWCTL_LIGHT_DELAY) {
-		if(pThis->iQueueSize >= pThis->iLightDlyMrk) {
-			dbgoprint((obj_t*) pThis, "enqueueMsg: LightDelay mark reached for light delayable message - blocking a bit.\n");
-			timeoutComp(&t, 1000); /* 1000 millisconds = 1 second TODO: make configurable */
-			pthread_cond_timedwait(&pThis->belowLightDlyWtrMrk, pThis->mut, &t); /* TODO error check? But what do then? */
-		}
-	}
-
-	/* from our regular flow control settings, we are now ready to enqueue the object.
-	 * However, we now need to do a check if the queue permits to add more data. If that
-	 * is not the case, basic flow control enters the field, which means we wait for
-	 * the queue to become ready or drop the new message. -- rgerhards, 2008-03-14
-	 */
-	while(   (pThis->iMaxQueueSize > 0 && pThis->iQueueSize >= pThis->iMaxQueueSize)
-	      || (pThis->qType == QUEUETYPE_DISK && pThis->sizeOnDiskMax != 0
-	      	  && pThis->tVars.disk.sizeOnDisk > pThis->sizeOnDiskMax)) {
-		dbgoprint((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n");
-		timeoutComp(&t, pThis->toEnq);
-		if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) {
-			dbgoprint((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n");
-			objDestruct(pUsr);
-			ABORT_FINALIZE(RS_RET_QUEUE_FULL);
-		}
-	}
-
-	/* and finally enqueue the message */
-	CHKiRet(qqueueAdd(pThis, pUsr));
-	qqueueChkPersist(pThis);
-
-finalize_it:
-	if(pThis->qType != QUEUETYPE_DIRECT) {
-		/* make sure at least one worker is running. */
-		qqueueAdviseMaxWorkers(pThis);
-		/* and release the mutex */
-		d_pthread_mutex_unlock(pThis->mut);
-		pthread_setcancelstate(iCancelStateSave, NULL);
-		dbgoprint((obj_t*) pThis, "EnqueueMsg advised worker start\n");
-	}
-
-	RETiRet;
-}
-
-
-/* enqueue a single data object. This currently is a helper to qqueueMultiEnqObj.
+/* enqueue a single data object.
  * Note that the queue mutex MUST already be locked when this function is called.
  * rgerhards, 2009-06-16
  */
@@ -2237,7 +2417,7 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
 
 	/* then check if we need to add an assistance disk queue */
 	if(pThis->bIsDA)
-		CHKiRet(qqueueChkStrtDA(pThis));
+		CHKiRet(ChkStrtDA(pThis));
 	
 	/* handle flow control
 	 * There are two different flow control mechanisms: basic and advanced flow control.
@@ -2261,12 +2441,12 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
 	 */
 	if(flowCtlType == eFLOWCTL_FULL_DELAY) {
 		while(pThis->iQueueSize >= pThis->iFullDlyMrk) {
-			dbgoprint((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message - blocking.\n");
+			DBGOPRINT((obj_t*) pThis, "enqueueMsg: FullDelay mark reached for full delayable message - blocking.\n");
 			pthread_cond_wait(&pThis->belowFullDlyWtrMrk, pThis->mut); /* TODO error check? But what do then? */
 		}
 	} else if(flowCtlType == eFLOWCTL_LIGHT_DELAY) {
 		if(pThis->iQueueSize >= pThis->iLightDlyMrk) {
-			dbgoprint((obj_t*) pThis, "enqueueMsg: LightDelay mark reached for light delayable message - blocking a bit.\n");
+			DBGOPRINT((obj_t*) pThis, "enqueueMsg: LightDelay mark reached for light delayable message - blocking a bit.\n");
 			timeoutComp(&t, 1000); /* 1000 millisconds = 1 second TODO: make configurable */
 			pthread_cond_timedwait(&pThis->belowLightDlyWtrMrk, pThis->mut, &t); /* TODO error check? But what do then? */
 		}
@@ -2280,10 +2460,10 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
 	while(   (pThis->iMaxQueueSize > 0 && pThis->iQueueSize >= pThis->iMaxQueueSize)
 	      || (pThis->qType == QUEUETYPE_DISK && pThis->sizeOnDiskMax != 0
 	      	  && pThis->tVars.disk.sizeOnDisk > pThis->sizeOnDiskMax)) {
-		dbgoprint((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n");
+		DBGOPRINT((obj_t*) pThis, "enqueueMsg: queue FULL - waiting to drain.\n");
 		timeoutComp(&t, pThis->toEnq);
 		if(pthread_cond_timedwait(&pThis->notFull, pThis->mut, &t) != 0) {
-			dbgoprint((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n");
+			DBGOPRINT((obj_t*) pThis, "enqueueMsg: cond timeout, dropping message!\n");
 			objDestruct(pUsr);
 			ABORT_FINALIZE(RS_RET_QUEUE_FULL);
 		}
@@ -2291,7 +2471,6 @@ doEnqSingleObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
 
 	/* and finally enqueue the message */
 	CHKiRet(qqueueAdd(pThis, pUsr));
-	qqueueChkPersist(pThis); // TODO: optimize, do in outer function! (but we need parts from v5?)
 
 finalize_it:
 	RETiRet;
@@ -2325,14 +2504,51 @@ dbgprintf("queueMultiEnq: %d\n", i);
 		CHKiRet(doEnqSingleObj(pThis, pMultiSub->ppMsgs[i]->flowCtlType, (void*)pMultiSub->ppMsgs[i]));
 	}
 
+	qqueueChkPersist(pThis, pMultiSub->nElem);
+
+finalize_it:
+	if(pThis->qType != QUEUETYPE_DIRECT) {
+		/* make sure at least one worker is running. */
+		qqueueAdviseMaxWorkers(pThis);
+		/* and release the mutex */
+		d_pthread_mutex_unlock(pThis->mut);
+		pthread_setcancelstate(iCancelStateSave, NULL);
+		DBGOPRINT((obj_t*) pThis, "MultiEnqObj advised worker start\n");
+	}
+
+	RETiRet;
+}
+
+
+/* enqueue a new user data element
+ * Enqueues the new element and awakes worker thread.
+ */
+rsRetVal
+qqueueEnqObj(qqueue_t *pThis, flowControl_t flowCtlType, void *pUsr)
+{
+	DEFiRet;
+	int iCancelStateSave;
+
+	ISOBJ_TYPE_assert(pThis, qqueue);
+
+	if(pThis->qType != QUEUETYPE_DIRECT) {
+		pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave);
+		d_pthread_mutex_lock(pThis->mut);
+	}
+
+	CHKiRet(doEnqSingleObj(pThis, flowCtlType, pUsr));
+
+	qqueueChkPersist(pThis, 1);
+
 finalize_it:
 	if(pThis->qType != QUEUETYPE_DIRECT) {
 		/* make sure at least one worker is running. */
 		qqueueAdviseMaxWorkers(pThis);
+dbgprintf("YYY: call advise with mutex %p locked \n", pThis->mut);
 		/* and release the mutex */
 		d_pthread_mutex_unlock(pThis->mut);
 		pthread_setcancelstate(iCancelStateSave, NULL);
-		dbgoprint((obj_t*) pThis, "MultiEnqObj advised worker start\n");
+		DBGOPRINT((obj_t*) pThis, "EnqueueMsg advised worker start\n");
 	}
 
 	RETiRet;
@@ -2348,7 +2564,7 @@ finalize_it:
  * rgerhards, 2008-01-16
  */
 static rsRetVal
-qqueueSetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
+SetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
 {
 	DEFiRet;
 	DEFVARS_mutexProtection;
@@ -2370,7 +2586,7 @@ qqueueSetEnqOnly(qqueue_t *pThis, int bEnqOnly, int bLockMutex)
 		if(bEnqOnly == 1) {
 			/* switch to enqueue-only mode */
 			/* this means we need to terminate all workers - that's it... */
-			dbgoprint((obj_t*) pThis, "switching to enqueue-only mode, terminating all worker threads\n");
+			DBGOPRINT((obj_t*) pThis, "switching to enqueue-only mode, terminating all worker threads\n");
 			if(pThis->pWtpReg != NULL)
 				wtpWakeupAllWrkr(pThis->pWtpReg);
 			if(pThis->pWtpDA != NULL)
@@ -2410,6 +2626,7 @@ DEFpropSetMeth(qqueue, iMinMsgsPerWrkr, int)
 DEFpropSetMeth(qqueue, bSaveOnShutdown, int)
 DEFpropSetMeth(qqueue, pUsr, void*)
 DEFpropSetMeth(qqueue, iDeqSlowdown, int)
+DEFpropSetMeth(qqueue, iDeqBatchSize, int)
 DEFpropSetMeth(qqueue, sizeOnDiskMax, int64)
 
 
@@ -2428,8 +2645,6 @@ static rsRetVal qqueueSetProperty(qqueue_t *pThis, var_t *pProp)
 
  	if(isProp("iQueueSize")) {
 		pThis->iQueueSize = pProp->val.num;
- 	} else if(isProp("iUngottenObjs")) {
-		pThis->iUngottenObjs = pProp->val.num;
  	} else if(isProp("tVars.disk.sizeOnDisk")) {
 		pThis->tVars.disk.sizeOnDisk = pProp->val.num;
  	} else if(isProp("tVars.disk.bytesRead")) {
diff --git a/runtime/queue.h b/runtime/queue.h
index 1d82d8d9..73c62b52 100644
--- a/runtime/queue.h
+++ b/runtime/queue.h
@@ -27,8 +27,18 @@
 #include <pthread.h>
 #include "obj.h"
 #include "wtp.h"
+#include "batch.h"
 #include "stream.h"
 
+/* support for the toDelete list */
+typedef struct toDeleteLst_s toDeleteLst_t;
+struct toDeleteLst_s {
+	qDeqID	deqID;
+	int	nElemDeq;	/* numbe of elements that were dequeued and as such must now be discarded */
+	struct toDeleteLst_s *pNext;
+};
+
+
 /* queue types */
 typedef enum {
 	QUEUETYPE_FIXED_ARRAY = 0,/* a simple queue made out of a fixed (initially malloced) array fast but memoryhog */
@@ -44,20 +54,11 @@ typedef struct qLinkedList_S {
 } qLinkedList_t;
 
 
-typedef struct qWrkThrd_s {
-	pthread_t thrdID;  /* thread ID */
-	qWrkCmd_t tCurrCmd; /* current command to be carried out by worker */
-	obj_t *pUsr;        /* current user object being processed (or NULL if none) */
-	struct queue_s *pQueue; /* my queue (important if only the work thread instance is passed! */
-	int iThrd;	/* my worker thread array index */
-	pthread_cond_t condInitDone; /* signaled when the thread startup is done (once per thread existance) */
-	pthread_mutex_t mut;
-} qWrkThrd_t;	/* type for queue worker threads */
-
 /* the queue object */
 typedef struct queue_s {
 	BEGINobjInstance;
 	queueType_t	qType;
+	int	nLogDeq;	/* number of elements currently logically dequeued */
 	bool	bEnqOnly;	/* does queue run in enqueue-only mode (1) or not (0)? */
 	bool	bSaveOnShutdown;/* persists everthing on shutdown (if DA!)? 1-yes, 0-no */
 	bool	bQueueStarted;	/* has queueStart() been called on this queue? 1-yes, 0-no */
@@ -83,7 +84,9 @@ typedef struct queue_s {
 	int	toQShutdown;	/* timeout for regular queue shutdown in ms */
 	int	toActShutdown;	/* timeout for long-running action shutdown in ms */
 	int	toWrkShutdown;	/* timeout for idle workers in ms, -1 means indefinite (0 is immediate) */
+	toDeleteLst_t *toDeleteLst;/* this queue's to-delete list */
 	int	toEnq;		/* enqueue timeout */
+	int	iDeqBatchSize;	/* max number of elements that shall be dequeued at once */
 	/* rate limiting settings (will be expanded) */
 	int	iDeqSlowdown; /* slow down dequeue by specified nbr of microseconds */
 	/* end rate limiting */
@@ -97,18 +100,19 @@ typedef struct queue_s {
 	 * applied to detect user configuration errors (and tell me how should we detect what
 	 * the user really wanted...). -- rgerhards, 2008-04-02
 	 */
-	/* ane dequeue time window */
-	rsRetVal (*pConsumer)(void *,void*); /* user-supplied consumer function for dequeued messages */
+	/* end dequeue time window */
+	rsRetVal (*pConsumer)(void *,batch_t*); /* user-supplied consumer function for dequeued messages */
 	/* calling interface for pConsumer: arg1 is the global user pointer from this structure, arg2 is the
-	 * user pointer that was dequeued (actual sample: for actions, arg1 is the pAction and arg2 is pointer
-	 * to message)
-	 * rgerhards, 2008-01-28
+	 * user pointer array that was dequeued (actual sample: for actions, arg1 is the pAction and arg2
+	 * is pointer to an array of message message pointers)
 	 */
 	/* type-specific handlers (set during construction) */
 	rsRetVal (*qConstruct)(struct queue_s *pThis);
 	rsRetVal (*qDestruct)(struct queue_s *pThis);
 	rsRetVal (*qAdd)(struct queue_s *pThis, void *pUsr);
-	rsRetVal (*qDel)(struct queue_s *pThis, void **ppUsr);
+	rsRetVal (*qDeq)(struct queue_s *pThis, void **ppUsr);
+	rsRetVal (*qDel)(struct queue_s *pThis);
+	rsRetVal (*qUnDeqAll)(struct queue_s *pThis);
 	/* end type-specific handler */
 	/* synchronization variables */
 	pthread_mutex_t mutThrdMgmt; /* mutex for the queue's thread management */
@@ -117,7 +121,6 @@ typedef struct queue_s {
 	pthread_cond_t belowFullDlyWtrMrk; /* below eFLOWCTL_FULL_DELAY watermark */
 	pthread_cond_t belowLightDlyWtrMrk; /* below eFLOWCTL_FULL_DELAY watermark */
 	pthread_cond_t condDAReady;/* signalled when the DA queue is fully initialized and ready for processing */
-	int bChildIsDone;		/* set to 1 when the child DA queue has finished processing, 0 otherwise */
 	int bThrdStateChanged;		/* at least one thread state has changed if 1 */
 	/* end sync variables */
 	/* the following variables are always present, because they
@@ -132,32 +135,30 @@ typedef struct queue_s {
 	int iNumberFiles;	/* how many files make up the queue? */
 	int64 iMaxFileSize;	/* max size for a single queue file */
 	int64 sizeOnDiskMax;    /* maximum size on disk allowed */
+	qDeqID deqIDAdd;	/* next dequeue ID to use during add to queue store */
+	qDeqID deqIDDel;	/* queue store delete position */
 	int bIsDA;		/* is this queue disk assisted? */
 	int bRunsDA;		/* is this queue actually *running* disk assisted? */
 	struct queue_s *pqDA;	/* queue for disk-assisted modes */
 	struct queue_s *pqParent;/* pointer to the parent (if this is a child queue) */
 	int	bDAEnqOnly;	/* EnqOnly setting for DA queue */
-	/* some data elements for the queueUngetObj() functionality. This list should always be short
-	 * and is always kept in memory
-	 */
-	qLinkedList_t *pUngetRoot;
-	qLinkedList_t *pUngetLast;
-	int iUngottenObjs;	/* number of objects currently in the "ungotten" list */
 	/* now follow queueing mode specific data elements */
 	union {			/* different data elements based on queue type (qType) */
 		struct {
-			long head, tail;
+			long deqhead, head, tail;
 			void** pBuf;		/* the queued user data structure */
 		} farray;
 		struct {
-			qLinkedList_t *pRoot;
+			qLinkedList_t *pDeqRoot;
+			qLinkedList_t *pDelRoot;
 			qLinkedList_t *pLast;
 		} linklist;
 		struct {
 			int64 sizeOnDisk; /* current amount of disk space used */
 			int64 bytesRead;  /* number of bytes read from current (undeleted!) file */
-			strm_t *pWrite; /* current file to be written */
-			strm_t *pRead;  /* current file to be read */
+			strm_t *pWrite;   /* current file to be written */
+			strm_t *pReadDeq; /* current file for dequeueing */
+			strm_t *pReadDel; /* current file for deleting */
 		} disk;
 	} tVars;
 } qqueue_t;
@@ -184,7 +185,7 @@ rsRetVal qqueueStart(qqueue_t *pThis);
 rsRetVal qqueueSetMaxFileSize(qqueue_t *pThis, size_t iMaxFileSize);
 rsRetVal qqueueSetFilePrefix(qqueue_t *pThis, uchar *pszPrefix, size_t iLenPrefix);
 rsRetVal qqueueConstruct(qqueue_t **ppThis, queueType_t qType, int iWorkerThreads,
-		        int iMaxQueueSize, rsRetVal (*pConsumer)(void*,void*));
+		        int iMaxQueueSize, rsRetVal (*pConsumer)(void*,batch_t*));
 PROTOTYPEObjClassInit(qqueue);
 PROTOTYPEpropSetMeth(qqueue, iPersistUpdCnt, int);
 PROTOTYPEpropSetMeth(qqueue, bSyncQueueFiles, int);
@@ -203,6 +204,7 @@ PROTOTYPEpropSetMeth(qqueue, bSaveOnShutdown, int);
 PROTOTYPEpropSetMeth(qqueue, pUsr, void*);
 PROTOTYPEpropSetMeth(qqueue, iDeqSlowdown, int);
 PROTOTYPEpropSetMeth(qqueue, sizeOnDiskMax, int64);
+PROTOTYPEpropSetMeth(qqueue, iDeqBatchSize, int);
 #define qqueueGetID(pThis) ((unsigned long) pThis)
 
 #endif /* #ifndef QUEUE_H_INCLUDED */
diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h
index 27bea6bc..59e8458b 100644
--- a/runtime/rsyslog.h
+++ b/runtime/rsyslog.h
@@ -69,8 +69,24 @@
 #endif
 
 
+/* the rsyslog core provides information about present feature to plugins
+ * asking it. Below are feature-test macros which must be used to query 
+ * features. Note that this must be powers of two, so that multiple queries
+ * can be combined. -- rgerhards, 2009-04-27
+ */
+#define CORE_FEATURE_BATCHING	1
+/*#define CORE_FEATURE_whatever 2 ... and so on ... */
+
+/* some universal fixed size integer defines ... */
+typedef long long int64;
+typedef long long unsigned uint64;
+typedef int64 number_t; /* type to use for numbers - TODO: maybe an autoconf option? */
+typedef char intTiny; 	/* 0..127! */
+typedef unsigned char uintTiny;	/* 0..255! */
+
 /* define some base data types */
 typedef unsigned char uchar;/* get rid of the unhandy "unsigned char" */
+typedef struct aUsrp_s aUsrp_t;
 typedef struct thrdInfo thrdInfo_t;
 typedef struct obj_s obj_t;
 typedef struct ruleset_s ruleset_t;
@@ -87,6 +103,7 @@ typedef struct nsd_gsspi_s nsd_gsspi_t;
 typedef struct nsd_nss_s nsd_nss_t;
 typedef struct nsdsel_ptcp_s nsdsel_ptcp_t;
 typedef struct nsdsel_gtls_s nsdsel_gtls_t;
+typedef struct wti_s wti_t;
 typedef obj_t nsd_t;
 typedef obj_t nsdsel_t;
 typedef struct msg msg_t;
@@ -102,18 +119,15 @@ typedef struct tcps_sess_s tcps_sess_t;
 typedef struct strmsrv_s strmsrv_t;
 typedef struct strms_sess_s strms_sess_t;
 typedef struct vmstk_s vmstk_t;
+typedef struct batch_obj_s batch_obj_t;
+typedef struct batch_s batch_t;
+typedef struct wtp_s wtp_t;
 typedef rsRetVal (*prsf_t)(struct vmstk_s*, int);	/* pointer to a RainerScript function */
+typedef uint64 qDeqID;	/* queue Dequeue order ID. 32 bits is considered dangerously few */
 
 typedef struct tcpLstnPortList_s tcpLstnPortList_t; // TODO: rename?
 typedef struct strmLstnPortList_s strmLstnPortList_t; // TODO: rename?
 
-/* some universal 64 bit define... */
-typedef long long int64;
-typedef long long unsigned uint64;
-typedef int64 number_t; /* type to use for numbers - TODO: maybe an autoconf option? */
-typedef char intTiny; 	/* 0..127! */
-typedef uchar uintTiny;	/* 0..255! */
-
 #ifdef __hpux
 typedef unsigned int u_int32_t; /* TODO: is this correct? */
 typedef int socklen_t;
@@ -369,15 +383,20 @@ enum rsRetVal_				/** return value. All methods return this if not specified oth
 	RS_RET_VAR_NOT_FOUND = -2142, /**< variable not found */
 	RS_RET_EMPTY_MSG = -2143, /**< provided (raw) MSG is empty */
 	RS_RET_PEER_CLOSED_CONN = -2144, /**< remote peer closed connection (information, no error) */
+	RS_RET_NO_SRCNAME_TPL = -2150, /**< sourcename template was not specified where one was needed (omudpspoof spoof addr) */
+	RS_RET_HOST_NOT_SPECIFIED = -2151, /**< (target) host was not specified where it was needed */
+	RS_RET_ERR_LIBNET_INIT = -2152, /**< error initializing libnet */
 
 	/* RainerScript error messages (range 1000.. 1999) */
 	RS_RET_SYSVAR_NOT_FOUND = 1001, /**< system variable could not be found (maybe misspelled) */
 
 	/* some generic error/status codes */
+	RS_RET_OK = 0,			/**< operation successful */
 	RS_RET_OK_DELETE_LISTENTRY = 1,	/**< operation successful, but callee requested the deletion of an entry (special state) */
 	RS_RET_TERMINATE_NOW = 2,	/**< operation successful, function is requested to terminate (mostly used with threads) */
 	RS_RET_NO_RUN = 3,		/**< operation successful, but function does not like to be executed */
-	RS_RET_OK = 0			/**< operation successful */
+	RS_RET_IDLE = 4,		/**< operation successful, but callee is idle (e.g. because queue is empty) */
+	RS_RET_TERMINATE_WHEN_IDLE = 5	/**< operation successful, function is requested to terminate when idle */
 };
 
 /* some helpful macros to work with srRetVals.
diff --git a/runtime/ruleset.c b/runtime/ruleset.c
index d98b4217..5ac9a8fd 100644
--- a/runtime/ruleset.c
+++ b/runtime/ruleset.c
@@ -138,8 +138,10 @@ finalize_it:
  */
 DEFFUNC_llExecFunc(processMsgDoRules)
 {
+	rsRetVal iRet;
 	ISOBJ_TYPE_assert(pData, rule);
-	return rule.ProcessMsg((rule_t*) pData, (msg_t*) pParam);
+	iRet = rule.ProcessMsg((rule_t*) pData, (msg_t*) pParam);
+	return iRet;
 }
 
 
@@ -159,8 +161,9 @@ processMsg(msg_t *pMsg)
 	CHKiRet(llExecFunc(&pThis->llRules, processMsgDoRules, pMsg));
 
 finalize_it:
-	if(iRet == RS_RET_DISCARDMSG)
-		iRet = RS_RET_OK;
+
+	//if(iRet == RS_RET_DISCARDMSG)
+		//iRet = RS_RET_OK;
 
 	RETiRet;
 }
diff --git a/runtime/srUtils.h b/runtime/srUtils.h
index 16766312..c4f73e16 100644
--- a/runtime/srUtils.h
+++ b/runtime/srUtils.h
@@ -110,30 +110,17 @@ rsRetVal getFileSize(uchar *pszName, off_t *pSize);
 
 /* some useful constants */
 #define DEFVARS_mutexProtection\
-	int iCancelStateSave; \
 	int bLockedOpIsLocked=0
 #define BEGIN_MTX_PROTECTED_OPERATIONS(mut, bMustLock) \
 	if(bMustLock == LOCK_MUTEX) { \
-		pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); \
 		d_pthread_mutex_lock(mut); \
+		assert(bLockedOpIsLocked == 0); \
 		bLockedOpIsLocked = 1; \
 	}
 #define END_MTX_PROTECTED_OPERATIONS(mut) \
 	if(bLockedOpIsLocked) { \
 		d_pthread_mutex_unlock(mut); \
-		pthread_setcancelstate(iCancelStateSave, NULL); \
+		bLockedOpIsLocked = 0; \
 	}
 
-/* The unconditional versions of the macro always lock the mutex. They are preferred in 
- * complex scenarios, where the simple ones might get mixed up by multiple calls.
- */
-#define DEFVARS_mutexProtection_uncond\
-	int iCancelStateSave
-#define BEGIN_MTX_PROTECTED_OPERATIONS_UNCOND(mut) \
-	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave); \
-	d_pthread_mutex_lock(mut); 
-#define END_MTX_PROTECTED_OPERATIONS_UNCOND(mut) \
-	d_pthread_mutex_unlock(mut); \
-	pthread_setcancelstate(iCancelStateSave, NULL);
-
 #endif
diff --git a/runtime/stream.c b/runtime/stream.c
index ac90df28..58f16cce 100644
--- a/runtime/stream.c
+++ b/runtime/stream.c
@@ -253,7 +253,7 @@ static rsRetVal strmOpenFile(strm_t *pThis)
 				    pThis->pszFName, pThis->lenFName, pThis->iCurrFNum, pThis->iFileNumDigits));
 	} else {
 		if(pThis->pszDir == NULL) {
-			if((pThis->pszCurrFName = (uchar*) strdup((char*) pThis->pszFName)) == NULL)
+			if((pThis->pszCurrFName = ustrdup(pThis->pszFName)) == NULL)
 				ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
 		} else {
 			CHKiRet(genFileName(&pThis->pszCurrFName, pThis->pszDir, pThis->lenDir,
@@ -1448,6 +1448,46 @@ finalize_it:
 }
 
 
+/* duplicate a stream object excluding dynamic properties. This function is
+ * primarily meant to provide a duplicate that later on can be used to access
+ * the data. This is needed, for example, for a restart of the disk queue.
+ * Note that ConstructFinalize() is NOT called. So our caller may change some
+ * properties before finalizing things.
+ * rgerhards, 2009-05-26
+ */
+rsRetVal
+strmDup(strm_t *pThis, strm_t **ppNew)
+{
+	strm_t *pNew = NULL;
+	DEFiRet;
+
+	ISOBJ_TYPE_assert(pThis, strm);
+	assert(ppNew != NULL);
+
+	CHKiRet(strmConstruct(&pNew));
+	pNew->sType = pThis->sType;
+	pNew->iCurrFNum = pThis->iCurrFNum;
+	CHKmalloc(pNew->pszFName = ustrdup(pThis->pszFName));
+	pNew->lenFName = pThis->lenFName;
+	CHKmalloc(pNew->pszDir = ustrdup(pThis->pszDir));
+	pNew->lenDir = pThis->lenDir;
+	pNew->tOperationsMode = pThis->tOperationsMode;
+	pNew->tOpenMode = pThis->tOpenMode;
+	pNew->iMaxFileSize = pThis->iMaxFileSize;
+	pNew->iMaxFiles = pThis->iMaxFiles;
+	pNew->iFileNumDigits = pThis->iFileNumDigits;
+	pNew->bDeleteOnClose = pThis->bDeleteOnClose;
+	pNew->iCurrOffs = pThis->iCurrOffs;
+	
+	*ppNew = pNew;
+	pNew = NULL;
+
+finalize_it:
+	if(pNew != NULL)
+		strmDestruct(&pNew);
+
+	RETiRet;
+}
 
 /* set a user write-counter. This counter is initialized to zero and
  * receives the number of bytes written. It is accurate only after a
@@ -1563,6 +1603,7 @@ CODESTARTobjQueryInterface(strm)
 	pIf->RecordEnd = strmRecordEnd;
 	pIf->Serialize = strmSerialize;
 	pIf->GetCurrOffset = strmGetCurrOffset;
+	pIf->Dup = strmDup;
 	pIf->SetWCntr = strmSetWCntr;
 	/* set methods */
 	pIf->SetbDeleteOnClose = strmSetbDeleteOnClose;
diff --git a/runtime/stream.h b/runtime/stream.h
index 64ffb6e1..9577d704 100644
--- a/runtime/stream.h
+++ b/runtime/stream.h
@@ -169,6 +169,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */
 	rsRetVal (*Serialize)(strm_t *pThis, strm_t *pStrm);
 	rsRetVal (*GetCurrOffset)(strm_t *pThis, int64 *pOffs);
 	rsRetVal (*SetWCntr)(strm_t *pThis, number_t *pWCnt);
+	rsRetVal (*Dup)(strm_t *pThis, strm_t **ppNew);
 	INTERFACEpropSetMeth(strm, bDeleteOnClose, int);
 	INTERFACEpropSetMeth(strm, iMaxFileSize, int);
 	INTERFACEpropSetMeth(strm, iMaxFiles, int);
@@ -183,7 +184,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */
 	INTERFACEpropSetMeth(strm, iFlushInterval, int);
 	INTERFACEpropSetMeth(strm, pszSizeLimitCmd, uchar*);
 ENDinterface(strm)
-#define strmCURR_IF_VERSION 2 /* increment whenever you change the interface structure! */
+#define strmCURR_IF_VERSION 5 /* increment whenever you change the interface structure! */
 
 
 /* prototypes */
diff --git a/runtime/syslogd-types.h b/runtime/syslogd-types.h
index 4a26f993..161ee06f 100644
--- a/runtime/syslogd-types.h
+++ b/runtime/syslogd-types.h
@@ -56,7 +56,8 @@
  * applications I do not yet envision. -- rgerhards, 2007-07-24
  */
 typedef enum _syslogFeature {
-	sFEATURERepeatedMsgReduction = 1
+	sFEATURERepeatedMsgReduction = 1,
+	sFEATURENonCancelInputTermination = 2
 } syslogFeature;
 
 /* we define our own facility and severities */
diff --git a/runtime/wti.c b/runtime/wti.c
index abdf4add..53b695b0 100644
--- a/runtime/wti.c
+++ b/runtime/wti.c
@@ -39,9 +39,10 @@
 #include <pthread.h>
 #include <errno.h>
 
-#ifdef OS_SOLARIS
-#	include <sched.h>
-#endif
+/// TODO: check on solaris if this is any longer needed - I don't think so - rgerhards, 2009-09-20
+//#ifdef OS_SOLARIS
+//#	include <sched.h>
+//#endif
 
 #include "rsyslog.h"
 #include "stringbuf.h"
@@ -75,92 +76,50 @@ wtiGetDbgHdr(wti_t *pThis)
 }
 
 
-/* get the current worker state. For simplicity and speed, we have
- * NOT used our regular calling interface this time. I hope that won't
- * bite in the long term... -- rgerhards, 2008-01-17
- * TODO: may be performance optimized by atomic operations
+/* return the current worker processing state. For the sake of
+ * simplicity, we do not use the iRet interface. -- rgerhards, 2009-07-17
  */
-qWrkCmd_t
-wtiGetState(wti_t *pThis, int bLockMutex)
+bool
+wtiGetState(wti_t *pThis)
 {
-	DEFVARS_mutexProtection;
-	qWrkCmd_t tCmd;
-
-	BEGINfunc
-	ISOBJ_TYPE_assert(pThis, wti);
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-	tCmd = pThis->tCurrCmd;
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
-
-	ENDfunc
-	return tCmd;
+	return ATOMIC_FETCH_32BIT(pThis->bIsRunning);
 }
 
 
-/* send a command to a specific thread
- * bActiveOnly specifies if the command should be sent only when the worker is
- * in an active state. -- rgerhards, 2008-01-20
+/* Set this thread to "always running" state (can not be unset)
+ * rgerhards, 2009-07-20
  */
 rsRetVal
-wtiSetState(wti_t *pThis, qWrkCmd_t tCmd, int bActiveOnly, int bLockMutex)
+wtiSetAlwaysRunning(wti_t *pThis)
 {
-	DEFiRet;
-	qWrkCmd_t tCurrCmd;
-	DEFVARS_mutexProtection;
-
 	ISOBJ_TYPE_assert(pThis, wti);
-	assert(tCmd <= eWRKTHRD_SHUTDOWN_IMMEDIATE);
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-
-	tCurrCmd = pThis->tCurrCmd;
-	/* all worker states must be followed sequentially, only termination can be set in any state */
-	if(   (bActiveOnly && (tCurrCmd < eWRKTHRD_RUN_CREATED))
-	   || (tCurrCmd > tCmd && !(tCmd == eWRKTHRD_TERMINATING || tCmd == eWRKTHRD_STOPPED))) {
-		DBGPRINTF("%s: command %d can not be accepted in current %d processing state - ignored\n",
-			  wtiGetDbgHdr(pThis), tCmd, tCurrCmd);
-	} else {
-		DBGPRINTF("%s: receiving command %d\n", wtiGetDbgHdr(pThis), tCmd);
-		/* we could replace this with a simple if, but we leave the switch in in case we need
-		 * to add something at a later stage. -- rgerhards, 2008-09-30
-		 */
-		switch(tCmd) {
-			case eWRKTHRD_TERMINATING:
-				/* TODO: re-enable meaningful debug msg! (via function callback?)
-				dbgprintf("%s: thread terminating with %d entries left in queue, %d workers running.\n",
-					  wtiGetDbgHdr(pThis->pQueue), pThis->pQueue->iQueueSize,
-					  pThis->pQueue->iCurNumWrkThrd);
-				*/
-				pthread_cond_signal(&pThis->condExitDone);
-				dbgprintf("%s: worker terminating\n", wtiGetDbgHdr(pThis));
-				break;
-			/* these cases just to satisfy the compiler, we do (yet) not act an them: */
-			case eWRKTHRD_RUNNING:
-			case eWRKTHRD_STOPPED:
-			case eWRKTHRD_RUN_CREATED:
-			case eWRKTHRD_RUN_INIT:
-			case eWRKTHRD_SHUTDOWN:
-			case eWRKTHRD_SHUTDOWN_IMMEDIATE:
-				/* DO NOTHING */
-				break;
-		}
-		/* apply the new state */
-		unsigned val = ATOMIC_CAS_VAL(pThis->tCurrCmd, tCurrCmd, tCmd);
-		if(val != tCurrCmd) {
-			DBGPRINTF("wtiSetState PROBLEM, tCurrCmd %d overwritten with %d, wanted to set %d\n", tCurrCmd, val, tCmd);
-		}
-
-	}
+	pThis->bAlwaysRunning = TRUE;
+	return RS_RET_OK;
+}
 
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
-	RETiRet;
+/* Set status (thread is running or not), actually an property of
+ * use for wtp, but we need to have it per thread instance (thus it
+ * is inside wti). -- rgerhards, 2009-07-17
+ */
+rsRetVal
+wtiSetState(wti_t *pThis, bool bNewVal)
+{
+	ISOBJ_TYPE_assert(pThis, wti);
+	if(bNewVal)
+		ATOMIC_STORE_1_TO_INT(pThis->bIsRunning);
+	else
+		ATOMIC_STORE_0_TO_INT(pThis->bIsRunning);
+	return RS_RET_OK;
 }
 
 
-/* Cancel the thread. If the thread is already cancelled or terminated,
- * we do not again cancel it. But it is save and legal to call wtiCancelThrd() in
- * such situations.
+/* Cancel the thread. If the thread is not running. But it is save and legal to
+ * call wtiCancelThrd() in such situations. This function only returns when the
+ * thread has terminated. Else we may get race conditions all over the code...
+ * Note that when waiting for the thread to terminate, we do a busy wait, checking
+ * progress every 10ms. It is very unlikely that we will ever cancel a thread
+ * and, if so, it will only happen at the end of the rsyslog run. So doing this
+ * kind of not optimal wait is considered preferable over using condition variables.
  * rgerhards, 2008-02-26
  */
 rsRetVal
@@ -170,19 +129,16 @@ wtiCancelThrd(wti_t *pThis)
 
 	ISOBJ_TYPE_assert(pThis, wti);
 
-	d_pthread_mutex_lock(&pThis->mut);
-
-	wtiProcessThrdChanges(pThis, MUTEX_ALREADY_LOCKED); /* process state change, so that we have current state vars */
-
-	if(pThis->tCurrCmd >= eWRKTHRD_TERMINATING) {
-		dbgoprint((obj_t*) pThis, "canceling worker thread, curr stat %d\n", pThis->tCurrCmd);
+	if(wtiGetState(pThis)) {
+		dbgoprint((obj_t*) pThis, "canceling worker thread\n");
 		pthread_cancel(pThis->thrdID);
-		wtiSetState(pThis, eWRKTHRD_TERMINATING, 0, MUTEX_ALREADY_LOCKED);
-		ATOMIC_STORE_1_TO_INT(pThis->pWtp->bThrdStateChanged); /* indicate change, so harverster will be called */
+		/* now wait until the thread terminates... */
+		while(wtiGetState(pThis)) {
+//fprintf(stderr, "sleep loop for getState\n");
+			srSleep(0, 10000);
+		}
 	}
 
-	d_pthread_mutex_unlock(&pThis->mut);
-
 	RETiRet;
 }
 
@@ -190,26 +146,8 @@ wtiCancelThrd(wti_t *pThis)
 /* Destructor */
 BEGINobjDestruct(wti) /* be sure to specify the object type also in END and CODESTART macros! */
 CODESTARTobjDestruct(wti)
-	/* if we reach this point, we must make sure the associated worker has terminated. It is
-	 * the callers duty to make sure the worker already knows it shall terminate.
-	 * TODO: is it *really* the caller's duty? ...mmmhhhh.... smells bad... rgerhards, 2008-01-25
-	 */
-	wtiProcessThrdChanges(pThis, LOCK_MUTEX); /* process state change one last time */
-
-	d_pthread_mutex_lock(&pThis->mut);
-	if(wtiGetState(pThis, MUTEX_ALREADY_LOCKED) != eWRKTHRD_STOPPED) {
-		dbgprintf("%s: WARNING: worker %p shall be destructed but is still running (might be OK) - joining it\n",
-			  wtiGetDbgHdr(pThis), pThis);
-		/* let's hope the caller actually instructed it to shutdown... */
-		pthread_cond_wait(&pThis->condExitDone, &pThis->mut);
-		wtiJoinThrd(pThis);
-	}
-	d_pthread_mutex_unlock(&pThis->mut);
-
 	/* actual destruction */
-	pthread_cond_destroy(&pThis->condExitDone);
-	pthread_mutex_destroy(&pThis->mut);
-
+	free(pThis->batch.pElem);
 	free(pThis->pszDbgHdr);
 ENDobjDestruct(wti)
 
@@ -217,8 +155,6 @@ ENDobjDestruct(wti)
 /* Standard-Constructor for the wti object
  */
 BEGINobjConstruct(wti) /* be sure to specify the object type also in END macro! */
-	pthread_cond_init(&pThis->condExitDone, NULL);
-	pthread_mutex_init(&pThis->mut, NULL);
 ENDobjConstruct(wti)
 
 
@@ -229,81 +165,28 @@ rsRetVal
 wtiConstructFinalize(wti_t *pThis)
 {
 	DEFiRet;
+	int iDeqBatchSize;
 
 	ISOBJ_TYPE_assert(pThis, wti);
 
 	dbgprintf("%s: finalizing construction of worker instance data\n", wtiGetDbgHdr(pThis));
 
-	/* initialize our thread instance descriptor */
-	pThis->pUsrp = NULL;
-	pThis->tCurrCmd = eWRKTHRD_STOPPED;
-
-	RETiRet;
-}
-
-
-/* join a specific worker thread
- * we do not lock the mutex, because join will sync anyways...
- */
-rsRetVal
-wtiJoinThrd(wti_t *pThis)
-{
-	DEFiRet;
+	/* initialize our thread instance descriptor (no concurrency here) */
+	pThis->bIsRunning = FALSE; 
 
-	ISOBJ_TYPE_assert(pThis, wti);
-	dbgprintf("waiting for worker %s termination, current state %d\n", wtiGetDbgHdr(pThis), pThis->tCurrCmd);
-	if (pThis->thrdID == 0) {
-		dbgprintf("worker %s was already stopped\n", wtiGetDbgHdr(pThis));
-	} else {
-		pthread_join(pThis->thrdID, NULL);
-		wtiSetState(pThis, eWRKTHRD_STOPPED, 0, MUTEX_ALREADY_LOCKED); /* back to virgin... */
-		pThis->thrdID = 0; /* invalidate the thread ID so that we do not accidently find reused ones */
-	dbgprintf("worker %s has stopped\n", wtiGetDbgHdr(pThis));
-	}   
-
-	RETiRet;
-}
-
-/* check if we had a worker thread changes and, if so, act
- * on it. At a minimum, terminated threads are harvested (joined).
- */
-rsRetVal
-wtiProcessThrdChanges(wti_t *pThis, int bLockMutex)
-{
-	DEFiRet;
-	DEFVARS_mutexProtection;
-
-	ISOBJ_TYPE_assert(pThis, wti);
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-	switch(pThis->tCurrCmd) {
-		case eWRKTHRD_TERMINATING:
-			/* we need to at least temporarily release the mutex, because otherwise
-			 * we may deadlock with the thread we intend to join (it aquires the mutex
-			 * during termination processing). -- rgerhards, 2008-02-26
-			 */
-			END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
-			iRet = wtiJoinThrd(pThis);
-			BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-			break;
-		/* these cases just to satisfy the compiler, we do not act an them: */
-		case eWRKTHRD_STOPPED:
-		case eWRKTHRD_RUN_CREATED:
-		case eWRKTHRD_RUN_INIT:
-		case eWRKTHRD_RUNNING:
-		case eWRKTHRD_SHUTDOWN:
-		case eWRKTHRD_SHUTDOWN_IMMEDIATE:
-			/* DO NOTHING */
-			break;
-	}
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
+	/* we now alloc the array for user pointers. We obtain the max from the queue itself. */
+	CHKiRet(pThis->pWtp->pfGetDeqBatchSize(pThis->pWtp->pUsr, &iDeqBatchSize));
+	CHKmalloc(pThis->batch.pElem = calloc((size_t)iDeqBatchSize, sizeof(batch_obj_t)));
 
+finalize_it:
 	RETiRet;
 }
 
 
 /* cancellation cleanup handler for queueWorker ()
  * Updates admin structure and frees ressources.
+ * Keep in mind that cancellation is disabled if we run into
+ * the cancel cleanup handler (and have been cancelled).
  * rgerhards, 2008-01-16
  */
 static void
@@ -311,7 +194,6 @@ wtiWorkerCancelCleanup(void *arg)
 {
 	wti_t *pThis = (wti_t*) arg;
 	wtp_t *pWtp;
-	int iCancelStateSave;
 
 	BEGINfunc
 	ISOBJ_TYPE_assert(pThis, wti);
@@ -320,17 +202,39 @@ wtiWorkerCancelCleanup(void *arg)
 
 	DBGPRINTF("%s: cancelation cleanup handler called.\n", wtiGetDbgHdr(pThis));
 	
-	/* call user supplied handler (that one e.g. requeues the element) */
-	pWtp->pfOnWorkerCancel(pThis->pWtp->pUsr, pThis->pUsrp);
+	/* call user supplied handler */
+	pWtp->pfOnWorkerCancel(pThis->pWtp->pUsr, pThis->batch.pElem[0].pUsrp);
 
-	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave);
-	d_pthread_mutex_lock(&pWtp->mut);
-	wtiSetState(pThis, eWRKTHRD_TERMINATING, 0, MUTEX_ALREADY_LOCKED);
-	/* TODO: sync access? I currently think it is NOT needed -- rgerhards, 2008-01-28 */
-	ATOMIC_STORE_1_TO_INT(pWtp->bThrdStateChanged); /* indicate change, so harverster will be called */
+	ENDfunc
+}
 
-	d_pthread_mutex_unlock(&pWtp->mut);
-	pthread_setcancelstate(iCancelStateSave, NULL);
+
+/* wait for queue to become non-empty or timeout
+ * helper to wtiWorker. Note the the predicate is
+ * re-tested by the caller, so it is OK to NOT do it here.
+ * rgerhards, 2009-05-20
+ */
+static inline void
+doIdleProcessing(wti_t *pThis, wtp_t *pWtp, int *pbInactivityTOOccured)
+{
+	struct timespec t;
+
+	BEGINfunc
+	DBGPRINTF("%s: worker IDLE, waiting for work.\n", wtiGetDbgHdr(pThis));
+
+	pWtp->pfOnIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED);
+
+	if(pThis->bAlwaysRunning) {
+		/* never shut down any started worker */
+dbgprintf("YYY/ZZZ: wti Idle wait cond busy, mutex %p\n", pWtp->pmutUsr);
+		d_pthread_cond_wait(pWtp->pcondBusy, pWtp->pmutUsr);
+	} else {
+		timeoutComp(&t, pWtp->toWrkShutdown);/* get absolute timeout */
+		if(d_pthread_cond_timedwait(pWtp->pcondBusy, pWtp->pmutUsr, &t) != 0) {
+			DBGPRINTF("%s: inactivity timeout, worker terminating...\n", wtiGetDbgHdr(pThis));
+			*pbInactivityTOOccured = 1; /* indicate we had a timeout */
+		}
+	}
 	ENDfunc
 }
 
@@ -341,82 +245,72 @@ wtiWorkerCancelCleanup(void *arg)
 rsRetVal
 wtiWorker(wti_t *pThis)
 {
-	DEFiRet;
-	DEFVARS_mutexProtection;
-	struct timespec t;
 	wtp_t *pWtp;		/* our worker thread pool */
 	int bInactivityTOOccured = 0;
+	rsRetVal localRet;
+	rsRetVal terminateRet;
+	int iCancelStateSave;
+	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, wti);
 	pWtp = pThis->pWtp; /* shortcut */
 	ISOBJ_TYPE_assert(pWtp, wtp);
 
 	dbgSetThrdName(pThis->pszDbgHdr);
-	pThis->pUsrp = NULL;
 	pthread_cleanup_push(wtiWorkerCancelCleanup, pThis);
 
-	BEGIN_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr, LOCK_MUTEX);
 	pWtp->pfOnWorkerStartup(pWtp->pUsr);
-	END_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr);
 
 	/* now we have our identity, on to real processing */
 	while(1) { /* loop will be broken below - need to do mutex locks */
-		/* process any pending thread requests */
-		wtpProcessThrdChanges(pWtp);
-
-		/* if we have a rate-limiter set for this worker pool, let's call it. Please
-		 * keep in mind that the rate-limiter may hold us for an extended period
-		 * of time. -- rgerhards, 2008-04-02
-		 */
-		if(pWtp->pfRateLimiter != NULL) {
+		if(pWtp->pfRateLimiter != NULL) { /* call rate-limiter, if defined */
 			pWtp->pfRateLimiter(pWtp->pUsr);
 		}
 		
-		wtpSetInactivityGuard(pThis->pWtp, 0, LOCK_MUTEX); /* must be set before usr mutex is locked! */
-		BEGIN_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr, LOCK_MUTEX);
-
-		if(  (bInactivityTOOccured && pWtp->pfIsIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED))
-		   || wtpChkStopWrkr(pWtp, LOCK_MUTEX, MUTEX_ALREADY_LOCKED)) {
-			END_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr);
-			break; /* end worker thread run */
+dbgprintf("YYY/ZZZ: pre lock mutex\n");
+		d_pthread_mutex_lock(pWtp->pmutUsr);
+
+dbgprintf("YYY/ZZZ: wti locks mutex %p\n", pWtp->pmutUsr);
+		/* first check if we are in shutdown process (but evaluate a bit later) */
+		terminateRet = wtpChkStopWrkr(pWtp, MUTEX_ALREADY_LOCKED);
+		if(terminateRet == RS_RET_TERMINATE_NOW) {
+			/* we now need to free the old batch */
+			localRet = pWtp->pfObjProcessed(pWtp->pUsr, pThis);
+			dbgoprint((obj_t*) pThis, "terminating worker because of TERMINATE_NOW mode, del iRet %d\n",
+				 localRet);
+			d_pthread_mutex_unlock(pWtp->pmutUsr);
+			break;
 		}
-		bInactivityTOOccured = 0; /* reset for next run */
 
-		/* if we reach this point, we are still protected by the mutex */
-
-		if(pWtp->pfIsIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED)) {
-			DBGPRINTF("%s: worker IDLE, waiting for work.\n", wtiGetDbgHdr(pThis));
-			pWtp->pfOnIdle(pWtp->pUsr, MUTEX_ALREADY_LOCKED);
-
-			if(pWtp->toWrkShutdown == -1) {
-				/* never shut down any started worker */
-				d_pthread_cond_wait(pWtp->pcondBusy, pWtp->pmutUsr);
-			} else {
-				timeoutComp(&t, pWtp->toWrkShutdown);/* get absolute timeout */
-				if(d_pthread_cond_timedwait(pWtp->pcondBusy, pWtp->pmutUsr, &t) != 0) {
-					DBGPRINTF("%s: inactivity timeout, worker terminating...\n", wtiGetDbgHdr(pThis));
-					bInactivityTOOccured = 1; /* indicate we had a timeout */
-				}
+		/* try to execute and process whatever we have */
+		/* Note that this function releases and re-aquires the mutex. The returned
+		 * information on idle state must be processed before releasing the mutex again.
+		 */
+		localRet = pWtp->pfDoWork(pWtp->pUsr, pThis);
+
+dbgprintf("YYY/ZZZ: wti loop locked mutex %p again\n", pWtp->pmutUsr);
+		if(localRet == RS_RET_IDLE) {
+			if(terminateRet == RS_RET_TERMINATE_WHEN_IDLE || bInactivityTOOccured) {
+				d_pthread_mutex_unlock(pWtp->pmutUsr);
+				break;	/* end of loop */
 			}
-			END_MTX_PROTECTED_OPERATIONS(pWtp->pmutUsr);
+			doIdleProcessing(pThis, pWtp, &bInactivityTOOccured);
+			d_pthread_mutex_unlock(pWtp->pmutUsr);
 			continue; /* request next iteration */
 		}
 
-		/* if we reach this point, we have a non-empty queue (and are still protected by mutex) */
-		pWtp->pfDoWork(pWtp->pUsr, pThis, iCancelStateSave);
+		d_pthread_mutex_unlock(pWtp->pmutUsr);
+
+		bInactivityTOOccured = 0; /* reset for next run */
 	}
 
 	/* indicate termination */
+	d_pthread_mutex_lock(pWtp->pmutUsr);
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave);
-	d_pthread_mutex_lock(&pThis->mut);
 	pthread_cleanup_pop(0); /* remove cleanup handler */
-
 	pWtp->pfOnWorkerShutdown(pWtp->pUsr);
-
-	wtiSetState(pThis, eWRKTHRD_TERMINATING, 0, MUTEX_ALREADY_LOCKED);
-	ATOMIC_STORE_1_TO_INT(pWtp->bThrdStateChanged); /* indicate change, so harverster will be called */
-	d_pthread_mutex_unlock(&pThis->mut);
 	pthread_setcancelstate(iCancelStateSave, NULL);
+	d_pthread_mutex_unlock(pWtp->pmutUsr);
 
 	RETiRet;
 }
@@ -444,7 +338,6 @@ wtiSetDbgHdr(wti_t *pThis, uchar *pszMsg, size_t lenMsg)
 
 	if(pThis->pszDbgHdr != NULL) {
 		free(pThis->pszDbgHdr);
-		pThis->pszDbgHdr = NULL;
 	}
 
 	if((pThis->pszDbgHdr = malloc(sizeof(uchar) * lenMsg + 1)) == NULL)
@@ -478,6 +371,5 @@ BEGINObjClassInit(wti, 1, OBJ_IS_CORE_MODULE) /* one is the object version (most
 	CHKiRet(objUse(glbl, CORE_COMPONENT));
 ENDObjClassInit(wti)
 
-/*
- * vi:set ai:
+/* vi:set ai:
  */
diff --git a/runtime/wti.h b/runtime/wti.h
index 72653b15..f466a053 100644
--- a/runtime/wti.h
+++ b/runtime/wti.h
@@ -1,6 +1,6 @@
 /* Definition of the worker thread instance (wti) class.
  *
- * Copyright 2008 Rainer Gerhards and Adiscon GmbH.
+ * Copyright 2008, 2009 by Rainer Gerhards and Adiscon GmbH.
  *
  * This file is part of the rsyslog runtime library.
  *
@@ -27,21 +27,19 @@
 #include <pthread.h>
 #include "wtp.h"
 #include "obj.h"
+#include "batch.h"
+
 
 /* the worker thread instance class */
-typedef struct wti_s {
+struct wti_s {
 	BEGINobjInstance;
-	pthread_t thrdID;  /* thread ID */
-	qWrkCmd_t tCurrCmd; /* current command to be carried out by worker */
-	obj_t *pUsrp;		/* pointer to an object meaningful for current user pointer (e.g. queue pUsr data elemt) */
+	pthread_t thrdID; 	/* thread ID */
+	int bIsRunning;	/* is this thread currently running? (must be int for atomic op!) */
+	bool bAlwaysRunning;	/* should this thread always run? */
 	wtp_t *pWtp; /* my worker thread pool (important if only the work thread instance is passed! */
-	pthread_cond_t condExitDone; /* signaled when the thread exit is done (once per thread existance) */
-	pthread_mutex_t mut;
-	bool bShutdownRqtd;	/* shutdown for this thread requested? 0 - no , 1 - yes */
+	batch_t batch; /* pointer to an object array meaningful for current user pointer (e.g. queue pUsr data elemt) */
 	uchar *pszDbgHdr;	/* header string for debug messages */
-} wti_t;
-
-/* some symbolic constants for easier reference */
+};
 
 
 /* prototypes */
@@ -49,12 +47,11 @@ rsRetVal wtiConstruct(wti_t **ppThis);
 rsRetVal wtiConstructFinalize(wti_t *pThis);
 rsRetVal wtiDestruct(wti_t **ppThis);
 rsRetVal wtiWorker(wti_t *pThis);
-rsRetVal wtiProcessThrdChanges(wti_t *pThis, int bLockMutex);
 rsRetVal wtiSetDbgHdr(wti_t *pThis, uchar *pszMsg, size_t lenMsg);
-rsRetVal wtiSetState(wti_t *pThis, qWrkCmd_t tCmd, int bActiveOnly, int bLockMutex);
-rsRetVal wtiJoinThrd(wti_t *pThis);
 rsRetVal wtiCancelThrd(wti_t *pThis);
-qWrkCmd_t wtiGetState(wti_t *pThis, int bLockMutex);
+rsRetVal wtiSetAlwaysRunning(wti_t *pThis);
+rsRetVal wtiSetState(wti_t *pThis, bool bNew);
+bool wtiGetState(wti_t *pThis);
 PROTOTYPEObjClassInit(wti);
 PROTOTYPEpropSetMeth(wti, pszDbgHdr, uchar*);
 PROTOTYPEpropSetMeth(wti, pWtp, wtp_t*);
diff --git a/runtime/wtp.c b/runtime/wtp.c
index 0c66dd11..47b99fe8 100644
--- a/runtime/wtp.c
+++ b/runtime/wtp.c
@@ -8,7 +8,7 @@
  * (and in the web doc set on http://www.rsyslog.com/doc). Be sure to read it
  * if you are getting aquainted to the object.
  *
- * Copyright 2008 Rainer Gerhards and Adiscon GmbH.
+ * Copyright 2008,2009 Rainer Gerhards and Adiscon GmbH.
  *
  * This file is part of the rsyslog runtime library.
  *
@@ -44,9 +44,10 @@
 #  include <sys/prctl.h>
 #endif
 
-#ifdef OS_SOLARIS
-#	include <sched.h>
-#endif
+/// TODO: check on solaris if this is any longer needed - I don't think so - rgerhards, 2009-09-20
+//#ifdef OS_SOLARIS
+//#	include <sched.h>
+//#endif
 
 #include "rsyslog.h"
 #include "stringbuf.h"
@@ -82,17 +83,20 @@ wtpGetDbgHdr(wtp_t *pThis)
 
 
 /* Not implemented dummy function for constructor */
-static rsRetVal NotImplementedDummy() { return RS_RET_OK; }
+static rsRetVal NotImplementedDummy() { return RS_RET_NOT_IMPLEMENTED; }
 /* Standard-Constructor for the wtp object
  */
 BEGINobjConstruct(wtp) /* be sure to specify the object type also in END macro! */
-	pthread_mutex_init(&pThis->mut, NULL);
-	pthread_mutex_init(&pThis->mutThrdShutdwn, NULL);
+	pthread_mutex_init(&pThis->mutWtp, NULL);
 	pthread_cond_init(&pThis->condThrdTrm, NULL);
+	pthread_attr_init(&pThis->attrThrd);
+	pthread_attr_setdetachstate(&pThis->attrThrd, PTHREAD_CREATE_DETACHED);
 	/* set all function pointers to "not implemented" dummy so that we can safely call them */
 	pThis->pfChkStopWrkr = NotImplementedDummy;
+	pThis->pfGetDeqBatchSize = NotImplementedDummy;
 	pThis->pfIsIdle = NotImplementedDummy;
 	pThis->pfDoWork = NotImplementedDummy;
+	pThis->pfObjProcessed = NotImplementedDummy;
 	pThis->pfOnIdle = NotImplementedDummy;
 	pThis->pfOnWorkerCancel = NotImplementedDummy;
 	pThis->pfOnWorkerStartup = NotImplementedDummy;
@@ -114,13 +118,13 @@ wtpConstructFinalize(wtp_t *pThis)
 
 	ISOBJ_TYPE_assert(pThis, wtp);
 
-	dbgprintf("%s: finalizing construction of worker thread pool\n", wtpGetDbgHdr(pThis));
+	DBGPRINTF("%s: finalizing construction of worker thread pool\n", wtpGetDbgHdr(pThis));
 	/* alloc and construct workers - this can only be done in finalizer as we previously do
 	 * not know the max number of workers
 	 */
 	if((pThis->pWrkr = malloc(sizeof(wti_t*) * pThis->iNumWorkerThreads)) == NULL)
 		ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY);
-
+	
 	for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) {
 		CHKiRet(wtiConstruct(&pThis->pWrkr[i]));
 		pWti = pThis->pWrkr[i];
@@ -140,8 +144,6 @@ finalize_it:
 BEGINobjDestruct(wtp) /* be sure to specify the object type also in END and CODESTART macros! */
 	int i;
 CODESTARTobjDestruct(wtp)
-	wtpProcessThrdChanges(pThis); /* process thread changes one last time */
-
 	/* destruct workers */
 	for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i)
 		wtiDestruct(&pThis->pWrkr[i]);
@@ -151,27 +153,13 @@ CODESTARTobjDestruct(wtp)
 
 	/* actual destruction */
 	pthread_cond_destroy(&pThis->condThrdTrm);
-	pthread_mutex_destroy(&pThis->mut);
-	pthread_mutex_destroy(&pThis->mutThrdShutdwn);
+	pthread_mutex_destroy(&pThis->mutWtp);
+	pthread_attr_destroy(&pThis->attrThrd);
 
 	free(pThis->pszDbgHdr);
 ENDobjDestruct(wtp)
 
 
-/* wake up at least one worker thread.
- * rgerhards, 2008-01-20
- */
-rsRetVal
-wtpWakeupWrkr(wtp_t *pThis)
-{
-	DEFiRet;
-
-	/* TODO; mutex? I think not needed, as we do not need predictable exec order -- rgerhards, 2008-01-28 */
-	ISOBJ_TYPE_assert(pThis, wtp);
-	pthread_cond_signal(pThis->pcondBusy);
-	RETiRet;
-}
-
 /* wake up all worker threads.
  * rgerhards, 2008-01-16
  */
@@ -186,99 +174,61 @@ wtpWakeupAllWrkr(wtp_t *pThis)
 }
 
 
-/* check if we had any worker thread changes and, if so, act
- * on them. At a minimum, terminated threads are harvested (joined).
- * This function MUST NEVER block on the queue mutex!
- */
-rsRetVal
-wtpProcessThrdChanges(wtp_t *pThis)
-{
-	DEFiRet;
-	int i;
-
-	ISOBJ_TYPE_assert(pThis, wtp);
-
-	if(pThis->bThrdStateChanged == 0)
-		FINALIZE;
-
-	if(d_pthread_mutex_trylock(&(pThis->mutThrdShutdwn)) != 0) {
-		/* another thread is already in the loop */
-		FINALIZE;
-	}
-
-	/* Note: there is a left-over potential race condition below:
-	 * pThis->bThrdStateChanged may be re-set by another thread while
-	 * we work on it and thus the loop may terminate too early. However,
-	 * there are no really bad effects from that so I perfer - for this
-	 * version - to live with the problem as is. Not a good idea to 
-	 * introduce that large change into the stable branch without very
-	 * good reason. -- rgerhards, 2009-04-02
-	 */
-	do {
-		/* reset the change marker */
-		ATOMIC_STORE_0_TO_INT(pThis->bThrdStateChanged);
-		/* go through all threads */
-		for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) {
-			wtiProcessThrdChanges(pThis->pWrkr[i], LOCK_MUTEX);
-		}
-	/* restart if another change occured while we were processing the changes */
-	} while(pThis->bThrdStateChanged != 0);
-
-	d_pthread_mutex_unlock(&(pThis->mutThrdShutdwn));
-
-finalize_it:
-	RETiRet;
-}
-
-
-/* Sent a specific state for the worker thread pool.
- * rgerhards, 2008-01-21
+/* Sent a specific state for the worker thread pool. -- rgerhards, 2008-01-21
+ * We do not need to do atomic instructions as set operations are only
+ * called when terminating the pool, and then in strict sequence. So we
+ * can never overwrite each other. On the other hand, it also doesn't
+ * matter if the read operation obtains an older value, as we then simply
+ * do one more iteration, what is perfectly legal (during shutdown
+ * they are awoken in any case). -- rgerhards, 2009-07-20
  */
 rsRetVal
 wtpSetState(wtp_t *pThis, wtpState_t iNewState)
 {
-	DEFiRet;
-
 	ISOBJ_TYPE_assert(pThis, wtp);
 	pThis->wtpState = iNewState;
-	/* TODO: must wakeup workers? seen to be not needed -- rgerhards, 2008-01-28 */
-
-	RETiRet;
+	return RS_RET_OK;
 }
 
 
 /* check if the worker shall shutdown (1 = yes, 0 = no)
- * TODO: check if we can use atomic operations to enhance performance
  * Note: there may be two mutexes locked, the bLockUsrMutex is the one in our "user"
  * (e.g. the queue clas)
  * rgerhards, 2008-01-21
  */
 rsRetVal
-wtpChkStopWrkr(wtp_t *pThis, int bLockMutex, int bLockUsrMutex)
+wtpChkStopWrkr(wtp_t *pThis, int bLockUsrMutex)
 {
 	DEFiRet;
-	DEFVARS_mutexProtection;
+	wtpState_t wtpState;
 
 	ISOBJ_TYPE_assert(pThis, wtp);
+	/* we need a consistent value, but it doesn't really matter if it is changed
+	 * right after the fetch - then we simply do one more iteration in the worker
+	 */
+	wtpState = ATOMIC_FETCH_32BIT(pThis->wtpState);
 
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-	if(   (pThis->wtpState == wtpState_SHUTDOWN_IMMEDIATE)
-	   || ((pThis->wtpState == wtpState_SHUTDOWN) && pThis->pfIsIdle(pThis->pUsr, bLockUsrMutex)))
-		iRet = RS_RET_TERMINATE_NOW;
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
+	if(wtpState == wtpState_SHUTDOWN_IMMEDIATE) {
+		ABORT_FINALIZE(RS_RET_TERMINATE_NOW);
+	} else if(wtpState == wtpState_SHUTDOWN) {
+		ABORT_FINALIZE(RS_RET_TERMINATE_WHEN_IDLE);
+	}
 
 	/* try customer handler if one was set and we do not yet have a definite result */
-	if(iRet == RS_RET_OK && pThis->pfChkStopWrkr != NULL) {
+	if(pThis->pfChkStopWrkr != NULL) {
 		iRet = pThis->pfChkStopWrkr(pThis->pUsr, bLockUsrMutex);
 	}
 
+finalize_it:
 	RETiRet;
 }
 
 
 #pragma GCC diagnostic ignored "-Wempty-body"
 /* Send a shutdown command to all workers and see if they terminate.
- * A timeout may be specified.
+ * A timeout may be specified. This function may also be called with
+ * the current number of workers being 0, in which case it does not
+ * shut down any worker.
  * rgerhards, 2008-01-14
  */
 rsRetVal
@@ -286,30 +236,22 @@ wtpShutdownAll(wtp_t *pThis, wtpState_t tShutdownCmd, struct timespec *ptTimeout
 {
 	DEFiRet;
 	int bTimedOut;
-	int iCancelStateSave;
 
 	ISOBJ_TYPE_assert(pThis, wtp);
 
 	wtpSetState(pThis, tShutdownCmd);
 	wtpWakeupAllWrkr(pThis);
 
-	/* see if we need to harvest (join) any terminated threads (even in timeout case,
-	 * some may have terminated...
-	 */
-	wtpProcessThrdChanges(pThis);
-		
-	/* and wait for their termination */
-	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &iCancelStateSave);
-	d_pthread_mutex_lock(&pThis->mut);
-	pthread_cleanup_push(mutexCancelCleanup, &pThis->mut);
-	pthread_setcancelstate(iCancelStateSave, NULL);
+	/* wait for worker thread termination */
+	d_pthread_mutex_lock(&pThis->mutWtp);
+	pthread_cleanup_push(mutexCancelCleanup, &pThis->mutWtp);
 	bTimedOut = 0;
 	while(pThis->iCurNumWrkThrd > 0 && !bTimedOut) {
-		dbgprintf("%s: waiting %ldms on worker thread termination, %d still running\n",
-			   wtpGetDbgHdr(pThis), timeoutVal(ptTimeout), pThis->iCurNumWrkThrd);
+		DBGPRINTF("%s: waiting %ldms on worker thread termination, %d still running\n",
+			   wtpGetDbgHdr(pThis), timeoutVal(ptTimeout), ATOMIC_FETCH_32BIT(pThis->iCurNumWrkThrd));
 
-		if(d_pthread_cond_timedwait(&pThis->condThrdTrm, &pThis->mut, ptTimeout) != 0) {
-			dbgprintf("%s: timeout waiting on worker thread termination\n", wtpGetDbgHdr(pThis));
+		if(d_pthread_cond_timedwait(&pThis->condThrdTrm, &pThis->mutWtp, ptTimeout) != 0) {
+			DBGPRINTF("%s: timeout waiting on worker thread termination\n", wtpGetDbgHdr(pThis));
 			bTimedOut = 1;	/* we exit the loop on timeout */
 		}
 	}
@@ -318,40 +260,11 @@ wtpShutdownAll(wtp_t *pThis, wtpState_t tShutdownCmd, struct timespec *ptTimeout
 	if(bTimedOut)
 		iRet = RS_RET_TIMED_OUT;
 	
-	/* see if we need to harvest (join) any terminated threads (even in timeout case,
-	 * some may have terminated...
-	 */
-	wtpProcessThrdChanges(pThis);
-
 	RETiRet;
 }
 #pragma GCC diagnostic warning "-Wempty-body"
 
 
-/* indicate that a thread has terminated and awake anyone waiting on it
- * rgerhards, 2008-01-23
- */
-rsRetVal wtpSignalWrkrTermination(wtp_t *pThis)
-{
-	DEFiRet;
-	/* I leave the mutex code here out as it gives us deadlocks. I think it is not really
-	 * needed and we are on the safe side. I leave this comment in if practice proves us
-	 * wrong. The whole thing should be removed after half a year or year if we see there
-	 * actually is no issue (or revisit it from a theoretical POV).
-	 * rgerhards, 2008-01-28
-	 * revisited 2008-09-30, still a bit unclear, leave in
-	 */
-	/*TODO: mutex or not mutex, that's the question ;)DEFVARS_mutexProtection;*/
-
-	ISOBJ_TYPE_assert(pThis, wtp);
-
-	/*BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, LOCK_MUTEX);*/
-	pthread_cond_signal(&pThis->condThrdTrm); /* activate anyone waiting on thread shutdown */
-	/*END_MTX_PROTECTED_OPERATIONS(&pThis->mut);*/
-	RETiRet;
-}
-
-
 /* Unconditionally cancel all running worker threads.
  * rgerhards, 2008-01-14
  */
@@ -363,12 +276,8 @@ wtpCancelAll(wtp_t *pThis)
 
 	ISOBJ_TYPE_assert(pThis, wtp);
 
-	/* process any pending thread requests so that we know who actually is still running */
-	wtpProcessThrdChanges(pThis);
-
 	/* go through all workers and cancel those that are active */
 	for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) {
-		dbgprintf("%s: try canceling worker thread %d\n", wtpGetDbgHdr(pThis), i);
 		wtiCancelThrd(pThis->pWrkr[i]);
 	}
 
@@ -376,39 +285,29 @@ wtpCancelAll(wtp_t *pThis)
 }
 
 
-
-/* Set the Inactivity Guard
- * rgerhards, 2008-01-21
- */
-rsRetVal
-wtpSetInactivityGuard(wtp_t *pThis, int bNewState, int bLockMutex)
-{
-	DEFiRet;
-	DEFVARS_mutexProtection;
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-	pThis->bInactivityGuard = bNewState;
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
-
-	RETiRet;
-}
-
-
-/* cancellation cleanup handler for executing worker
- * decrements the worker counter
- * rgerhards, 2008-01-20
+/* cancellation cleanup handler for executing worker decrements the worker counter.
+ * This is also called when the the worker is normally shut down.
+ * rgerhards, 2009-07-20
  */
-void
+static void
 wtpWrkrExecCancelCleanup(void *arg)
 {
-	wtp_t *pThis = (wtp_t*) arg;
+	wti_t *pWti = (wti_t*) arg;
+	wtp_t *pThis;
 
 	BEGINfunc
+	ISOBJ_TYPE_assert(pWti, wti);
+	pThis = pWti->pWtp;
 	ISOBJ_TYPE_assert(pThis, wtp);
-	pThis->iCurNumWrkThrd--;
-	wtpSignalWrkrTermination(pThis);
 
-	dbgprintf("%s: thread CANCELED with %d workers running.\n", wtpGetDbgHdr(pThis), pThis->iCurNumWrkThrd);
+	/* the order of the next two statements is important! */
+	wtiSetState(pWti, WRKTHRD_STOPPED);
+	ATOMIC_DEC(pThis->iCurNumWrkThrd);
+
+	DBGPRINTF("%s: Worker thread %lx, terminated, num workers now %d\n",
+		  wtpGetDbgHdr(pThis), (unsigned long) pWti, ATOMIC_FETCH_32BIT(pThis->iCurNumWrkThrd));
+
+	pthread_cond_broadcast(&pThis->condThrdTrm); /* activate anyone waiting on thread shutdown */
 	ENDfunc
 }
 
@@ -423,12 +322,11 @@ wtpWorker(void *arg) /* the arg is actually a wti object, even though we are in
 {
 	uchar *pszDbgHdr;
 	uchar thrdName[32] = "rs:";
-	DEFiRet;
-	DEFVARS_mutexProtection;
 	wti_t *pWti = (wti_t*) arg;
 	wtp_t *pThis;
 	sigset_t sigSet;
 
+	BEGINfunc
 	ISOBJ_TYPE_assert(pWti, wti);
 	pThis = pWti->pWtp;
 	ISOBJ_TYPE_assert(pThis, wtp);
@@ -445,39 +343,9 @@ wtpWorker(void *arg) /* the arg is actually a wti object, even though we are in
 	}
 #	endif
 
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, LOCK_MUTEX);
-
-	/* do some late initialization */
-
-	pthread_cleanup_push(wtpWrkrExecCancelCleanup, pThis);
-
-	/* finally change to RUNNING state. We need to check if we actually should still run,
-	 * because someone may have requested us to shut down even before we got a chance to do
-	 * our init. That would be a bad race... -- rgerhards, 2008-01-16
-	 */
-	wtiSetState(pWti, eWRKTHRD_RUNNING, 0, MUTEX_ALREADY_LOCKED); /* we are running now! */
-
-	do {
-		END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
-
-		iRet = wtiWorker(pWti); /* just to make sure: this is NOT protected by the mutex! */
-
-		BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, LOCK_MUTEX);
-	} while(pThis->iCurNumWrkThrd == 1 && pThis->bInactivityGuard == 1);
-	/* inactivity guard prevents shutdown of all workers while one should be running due to race
-	 * condition. It can lead to one more worker running than desired, but that is acceptable. After
-	 * all, that worker will shutdown itself due to inactivity timeout. If, however, none were running
-	 * when one was required, processing could come to a halt. -- rgerhards, 2008-01-21
-	 */
-
-	pthread_cleanup_pop(0);
-	pThis->iCurNumWrkThrd--;
-	wtpSignalWrkrTermination(pThis);
-
-	dbgprintf("%s: Worker thread %lx, terminated, num workers now %d\n",
-		  wtpGetDbgHdr(pThis), (unsigned long) pWti, pThis->iCurNumWrkThrd);
-
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
+	pthread_cleanup_push(wtpWrkrExecCancelCleanup, pWti);
+	wtiWorker(pWti);
+	pthread_cleanup_pop(1);
 
 	ENDfunc
 	pthread_exit(0);
@@ -487,27 +355,20 @@ wtpWorker(void *arg) /* the arg is actually a wti object, even though we are in
 
 /* start a new worker */
 static rsRetVal
-wtpStartWrkr(wtp_t *pThis, int bLockMutex)
+wtpStartWrkr(wtp_t *pThis)
 {
-	DEFiRet;
-	DEFVARS_mutexProtection;
 	wti_t *pWti;
 	int i;
 	int iState;
+	DEFiRet;
 
 	ISOBJ_TYPE_assert(pThis, wtp);
 
-	wtpProcessThrdChanges(pThis);	// TODO: Performance: this causes a lot of FUTEX calls
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-
-	pThis->iCurNumWrkThrd++;
+	d_pthread_mutex_lock(&pThis->mutWtp);
 
-	/* find free spot in thread table. If we find at least one worker that is in initialization,
-	 * we do NOT start a new one. Let's give the other one a chance, first.
-	 */
+	/* find free spot in thread table. */
 	for(i = 0 ; i < pThis->iNumWorkerThreads ; ++i) {
-		if(wtiGetState(pThis->pWrkr[i], LOCK_MUTEX) == eWRKTHRD_STOPPED) {
+		if(wtiGetState(pThis->pWrkr[i]) == WRKTHRD_STOPPED) {
 			break;
 		}
 	}
@@ -515,17 +376,20 @@ wtpStartWrkr(wtp_t *pThis, int bLockMutex)
 	if(i == pThis->iNumWorkerThreads)
 		ABORT_FINALIZE(RS_RET_NO_MORE_THREADS);
 
+	if(i == 0 || pThis->toWrkShutdown == -1) {
+		wtiSetAlwaysRunning(pThis->pWrkr[i]);
+	}
+
 	pWti = pThis->pWrkr[i];
-	wtiSetState(pWti, eWRKTHRD_RUN_CREATED, 0, LOCK_MUTEX);
-	iState = pthread_create(&(pWti->thrdID), NULL, wtpWorker, (void*) pWti);
-	dbgprintf("%s: started with state %d, num workers now %d\n",
-		  wtpGetDbgHdr(pThis), iState, pThis->iCurNumWrkThrd);
+	wtiSetState(pWti, WRKTHRD_RUNNING);
+	iState = pthread_create(&(pWti->thrdID), &pThis->attrThrd, wtpWorker, (void*) pWti);
+	ATOMIC_INC(pThis->iCurNumWrkThrd); /* we got one more! */
 
-	/* indicate we just started a worker and would like to see it running */
-	wtpSetInactivityGuard(pThis, 1, MUTEX_ALREADY_LOCKED);
+	DBGPRINTF("%s: started with state %d, num workers now %d\n",
+		  wtpGetDbgHdr(pThis), iState, ATOMIC_FETCH_32BIT(pThis->iCurNumWrkThrd));
 
 finalize_it:
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
+	d_pthread_mutex_unlock(&pThis->mutWtp);
 	RETiRet;
 }
 
@@ -542,38 +406,34 @@ rsRetVal
 wtpAdviseMaxWorkers(wtp_t *pThis, int nMaxWrkr)
 {
 	DEFiRet;
-	DEFVARS_mutexProtection;
 	int nMissing; /* number workers missing to run */
 	int i;
 
 	ISOBJ_TYPE_assert(pThis, wtp);
 
+int nMaxWrkrTmp = nMaxWrkr;
 	if(nMaxWrkr == 0)
 		FINALIZE;
 
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, LOCK_MUTEX);
-
 	if(nMaxWrkr > pThis->iNumWorkerThreads) /* limit to configured maximum */
 		nMaxWrkr = pThis->iNumWorkerThreads;
 
-	nMissing = nMaxWrkr - pThis->iCurNumWrkThrd;
+	nMissing = nMaxWrkr - ATOMIC_FETCH_32BIT(pThis->iCurNumWrkThrd);
+dbgprintf("wtpAdviseMaxWorkers, nmax: %d, curr %d, missing %d\n", nMaxWrkrTmp, pThis->iNumWorkerThreads, nMissing);
 
 	if(nMissing > 0) {
-		dbgprintf("%s: high activity - starting %d additional worker thread(s).\n", wtpGetDbgHdr(pThis), nMissing);
+		DBGPRINTF("%s: high activity - starting %d additional worker thread(s).\n", wtpGetDbgHdr(pThis), nMissing);
 		/* start the rqtd nbr of workers */
 		for(i = 0 ; i < nMissing ; ++i) {
-			CHKiRet(wtpStartWrkr(pThis, MUTEX_ALREADY_LOCKED));
-		}
-	} else  {
-		if(nMaxWrkr > 0) {
-	dbgprintf("wtpAdviseMaxWorkers signals busy\n");
-			wtpWakeupWrkr(pThis);
+			CHKiRet(wtpStartWrkr(pThis));
 		}
+	} else {
+dbgprintf("YYY: adivse signal cond busy");
+		pthread_cond_signal(pThis->pcondBusy);
 	}
 
 	
 finalize_it:
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
 	RETiRet;
 }
 
@@ -587,37 +447,16 @@ DEFpropSetMethPTR(wtp, pmutUsr, pthread_mutex_t)
 DEFpropSetMethPTR(wtp, pcondBusy, pthread_cond_t)
 DEFpropSetMethFP(wtp, pfChkStopWrkr, rsRetVal(*pVal)(void*, int))
 DEFpropSetMethFP(wtp, pfRateLimiter, rsRetVal(*pVal)(void*))
-DEFpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, int))
-DEFpropSetMethFP(wtp, pfDoWork, rsRetVal(*pVal)(void*, void*, int))
+DEFpropSetMethFP(wtp, pfGetDeqBatchSize, rsRetVal(*pVal)(void*, int*))
+DEFpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, wtp_t*))
+DEFpropSetMethFP(wtp, pfDoWork, rsRetVal(*pVal)(void*, void*))
+DEFpropSetMethFP(wtp, pfObjProcessed, rsRetVal(*pVal)(void*, wti_t*))
 DEFpropSetMethFP(wtp, pfOnIdle, rsRetVal(*pVal)(void*, int))
 DEFpropSetMethFP(wtp, pfOnWorkerCancel, rsRetVal(*pVal)(void*, void*))
 DEFpropSetMethFP(wtp, pfOnWorkerStartup, rsRetVal(*pVal)(void*))
 DEFpropSetMethFP(wtp, pfOnWorkerShutdown, rsRetVal(*pVal)(void*))
 
 
-/* return the current number of worker threads.
- * TODO: atomic operation would bring a nice performance
- * enhancemcent
- * rgerhards, 2008-01-27
- */
-int
-wtpGetCurNumWrkr(wtp_t *pThis, int bLockMutex)
-{
-	DEFVARS_mutexProtection;
-	int iNumWrkr;
-
-	BEGINfunc
-	ISOBJ_TYPE_assert(pThis, wtp);
-
-	BEGIN_MTX_PROTECTED_OPERATIONS(&pThis->mut, bLockMutex);
-	iNumWrkr = pThis->iCurNumWrkThrd;
-	END_MTX_PROTECTED_OPERATIONS(&pThis->mut);
-
-	ENDfunc
-	return iNumWrkr;
-}
-
-
 /* set the debug header message
  * The passed-in string is duplicated. So if the caller does not need
  * it any longer, it must free it. Must be called only before object is finalized.
@@ -669,6 +508,5 @@ BEGINObjClassInit(wtp, 1, OBJ_IS_CORE_MODULE)
 	CHKiRet(objUse(glbl, CORE_COMPONENT));
 ENDObjClassInit(wtp)
 
-/*
- * vi:set ai:
+/* vi:set ai:
  */
diff --git a/runtime/wtp.h b/runtime/wtp.h
index 1ce171cc..0505b91c 100644
--- a/runtime/wtp.h
+++ b/runtime/wtp.h
@@ -27,18 +27,9 @@
 #include <pthread.h>
 #include "obj.h"
 
-/* commands and states for worker threads. */
-typedef enum {
-	eWRKTHRD_STOPPED = 0,	/* worker thread is not running (either actually never ran or was shut down) */
-	eWRKTHRD_TERMINATING = 1,/* worker thread has shut down, but some finalzing is still needed */
-	/* ALL active states MUST be numerically higher than eWRKTHRD_TERMINATED and NONE must be lower! */
-	eWRKTHRD_RUN_CREATED = 2,/* worker thread has been created, but not yet begun initialization (prob. not yet scheduled) */
-	eWRKTHRD_RUN_INIT = 3,	/* worker thread is initializing, but not yet fully running */
-	eWRKTHRD_RUNNING = 4,	/* worker thread is up and running and shall continue to do so */
-	eWRKTHRD_SHUTDOWN = 5,	/* worker thread is running but shall terminate when wtp is empty */
-	eWRKTHRD_SHUTDOWN_IMMEDIATE = 6/* worker thread is running but shall terminate even if wtp is full */
-	/* SHUTDOWN_IMMEDIATE MUST alsways be the numerically highest state! */
-} qWrkCmd_t;
+/* states for worker threads. */
+#define WRKTHRD_STOPPED  FALSE
+#define WRKTHRD_RUNNING  TRUE
 
 
 /* possible states of a worker thread pool */
@@ -50,36 +41,36 @@ typedef enum {
 
 
 /* the worker thread pool (wtp) object */
-typedef struct wtp_s {
+struct wtp_s {
 	BEGINobjInstance;
 	wtpState_t wtpState;
 	int 	iNumWorkerThreads;/* number of worker threads to use */
 	int 	iCurNumWrkThrd;/* current number of active worker threads */
 	struct wti_s **pWrkr;/* array with control structure for the worker thread(s) associated with this wtp */
 	int	toWrkShutdown;	/* timeout for idle workers in ms, -1 means indefinite (0 is immediate) */
-	bool	bInactivityGuard;/* prevents inactivity due to race condition */
 	rsRetVal (*pConsumer)(void *); /* user-supplied consumer function for dewtpd messages */
 	/* synchronization variables */
-	pthread_mutex_t mutThrdShutdwn; /* mutex to guard thread shutdown processing */
-	pthread_mutex_t mut; /* mutex for the wtp's thread management */
+	pthread_mutex_t mutWtp; /* mutex for the wtp's thread management */
 	pthread_cond_t condThrdTrm;/* signalled when threads terminate */
-	int bThrdStateChanged;	/* at least one thread state has changed if 1 */
 	/* end sync variables */
 	/* user objects */
-	void *pUsr;		/* pointer to user object */
+	void *pUsr;		/* pointer to user object (in this case, the queue the wtp belongs to) */
+	pthread_attr_t attrThrd;/* attribute for new threads (created just once and cached here) */
 	pthread_mutex_t *pmutUsr;
 	pthread_cond_t *pcondBusy; /* condition the user will signal "busy again, keep runing" on (awakes worker) */
 	rsRetVal (*pfChkStopWrkr)(void *pUsr, int);
+	rsRetVal (*pfGetDeqBatchSize)(void *pUsr, int*); /* obtains max dequeue count from queue config */
+	rsRetVal (*pfObjProcessed)(void *pUsr, wti_t *pWti); /* indicate user object is processed */
 	rsRetVal (*pfRateLimiter)(void *pUsr);
-	rsRetVal (*pfIsIdle)(void *pUsr, int);
-	rsRetVal (*pfDoWork)(void *pUsr, void *pWti, int);
+	rsRetVal (*pfIsIdle)(void *pUsr, wtp_t *pWtp);
+	rsRetVal (*pfDoWork)(void *pUsr, void *pWti);
 	rsRetVal (*pfOnIdle)(void *pUsr, int);
 	rsRetVal (*pfOnWorkerCancel)(void *pUsr, void*pWti);
 	rsRetVal (*pfOnWorkerStartup)(void *pUsr);
 	rsRetVal (*pfOnWorkerShutdown)(void *pUsr);
 	/* end user objects */
 	uchar *pszDbgHdr;	/* header string for debug messages */
-} wtp_t;
+};
 
 /* some symbolic constants for easier reference */
 
@@ -90,21 +81,19 @@ rsRetVal wtpConstructFinalize(wtp_t *pThis);
 rsRetVal wtpDestruct(wtp_t **ppThis);
 rsRetVal wtpAdviseMaxWorkers(wtp_t *pThis, int nMaxWrkr);
 rsRetVal wtpProcessThrdChanges(wtp_t *pThis);
-rsRetVal wtpSetInactivityGuard(wtp_t *pThis, int bNewState, int bLockMutex);
-rsRetVal wtpChkStopWrkr(wtp_t *pThis, int bLockMutex, int bLockUsrMutex);
+rsRetVal wtpChkStopWrkr(wtp_t *pThis, int bLockUsrMutex);
 rsRetVal wtpSetState(wtp_t *pThis, wtpState_t iNewState);
-rsRetVal wtpWakeupWrkr(wtp_t *pThis);
 rsRetVal wtpWakeupAllWrkr(wtp_t *pThis);
 rsRetVal wtpCancelAll(wtp_t *pThis);
 rsRetVal wtpSetDbgHdr(wtp_t *pThis, uchar *pszMsg, size_t lenMsg);
-rsRetVal wtpSignalWrkrTermination(wtp_t *pWtp);
 rsRetVal wtpShutdownAll(wtp_t *pThis, wtpState_t tShutdownCmd, struct timespec *ptTimeout);
-int wtpGetCurNumWrkr(wtp_t *pThis, int bLockMutex);
 PROTOTYPEObjClassInit(wtp);
 PROTOTYPEpropSetMethFP(wtp, pfChkStopWrkr, rsRetVal(*pVal)(void*, int));
 PROTOTYPEpropSetMethFP(wtp, pfRateLimiter, rsRetVal(*pVal)(void*));
-PROTOTYPEpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, int));
-PROTOTYPEpropSetMethFP(wtp, pfDoWork, rsRetVal(*pVal)(void*, void*, int));
+PROTOTYPEpropSetMethFP(wtp, pfGetDeqBatchSize, rsRetVal(*pVal)(void*, int*));
+PROTOTYPEpropSetMethFP(wtp, pfIsIdle, rsRetVal(*pVal)(void*, wtp_t*));
+PROTOTYPEpropSetMethFP(wtp, pfDoWork, rsRetVal(*pVal)(void*, void*));
+PROTOTYPEpropSetMethFP(wtp, pfObjProcessed, rsRetVal(*pVal)(void*, wti_t*));
 PROTOTYPEpropSetMethFP(wtp, pfOnIdle, rsRetVal(*pVal)(void*, int));
 PROTOTYPEpropSetMethFP(wtp, pfOnWorkerCancel, rsRetVal(*pVal)(void*,void*));
 PROTOTYPEpropSetMethFP(wtp, pfOnWorkerStartup, rsRetVal(*pVal)(void*));