bugfix and testbench improvement

made shutdown more reliable by makeing sure that the main queue DA worker is only cancelled if this is actually unavoidable. Also moved down the deletion of rsyslogd's pid file to immediately before termination, so that absence of the file is a proper indication that rsyslogd has finished (in the past, e.g. the testbench accidently ran two intances as the pid file was deleted too early). Also some improvments to the testbench, namely to handle aborts more intelligently (but still not perfect).
author: Rainer Gerhards <rgerhards@adiscon.com> 2009-10-07 10:53:05 +0200
committer: Rainer Gerhards <rgerhards@adiscon.com> 2009-10-07 10:53:05 +0200
commit: 92ec206279e29d12d3d44e51280485d641579e41 (patch)
tree: 7b8f3c0216f67f7a34c119d27bd9ad9bf339f87b /runtime
parent: caeb203f5188aafccc667c1afc42a49df80d0747 (diff)
download: rsyslog-92ec206279e29d12d3d44e51280485d641579e41.tar.gz
rsyslog-92ec206279e29d12d3d44e51280485d641579e41.tar.xz
rsyslog-92ec206279e29d12d3d44e51280485d641579e41.zip
1 files changed, 20 insertions, 2 deletions
diff --git a/runtime/queue.c b/runtime/queue.c
index cb14b58d..96ebd6d5 100644
--- a/runtime/queue.c
+++ b/runtime/queue.c
@@ -1188,7 +1188,7 @@ tryShutdownWorkersWithinQueueTimeout(qqueue_t *pThis)
 		if(iRetLocal == RS_RET_TIMED_OUT) {
 			DBGOPRINT((obj_t*) pThis, "shutdown timed out on main queue DA worker pool (this is OK)\n");
 		} else {
-			DBGOPRINT((obj_t*) pThis, "main queue DA worker pool shut down.\n");
+			DBGOPRINT((obj_t*) pThis, "main queue DA worker pool shut down on first try.\n");
 		}
 	}
 
@@ -1247,13 +1247,31 @@ tryShutdownWorkersWithinActionTimeout(qqueue_t *pThis)
 			DBGOPRINT((obj_t*) pThis, "unexpected iRet state %d after trying immediate shutdown of the DA "
 				  "queue in disk save mode. Continuing, but results are unpredictable\n", iRetLocal);
 		}
+		/* and now we need to check the DA worker itself (the one that shuffles data to the disk). This
+		 * is necessary because we may be in a situation where the DA queue regular worker and the
+		 * main queue worker stopped rather quickly. In this case, there is almost no time (and
+		 * probably no thread switch!) between the point where we instructed the main queue DA
+		 * worker to shutdown and this code location. In consequence, it may not even have
+		 * noticed that it should should down, less acutally done this. So we provide it with a 
+		 * fixed 100ms timeout to try complete its work, what usually should be sufficient.
+		 * rgerhards, 2009-10-06
+		 */
+		timeoutComp(&tTimeout, 100);
+		DBGOPRINT((obj_t*) pThis, "last try for regular shutdown of main queue DA worker pool\n");
+		iRetLocal = wtpShutdownAll(pThis->pWtpDA, wtpState_SHUTDOWN_IMMEDIATE, &tTimeout);
+		if(iRetLocal == RS_RET_TIMED_OUT) {
+			DBGOPRINT((obj_t*) pThis, "shutdown timed out on main queue DA worker pool "
+					          "(this is not good, but probably OK)\n");
+		} else {
+			DBGOPRINT((obj_t*) pThis, "main queue DA worker pool shut down.\n");
+		}
 	}
 
 	RETiRet;
 }
 
 
-/* This function cancels all remenaing regular workers for both the main and the DA
+/* This function cancels all remaining regular workers for both the main and the DA
  * queue. The main queue's DA worker pool continues to run (if it exists and is active).
  * rgerhards, 2009-05-29
  */
author	Rainer Gerhards <rgerhards@adiscon.com>	2009-10-07 10:53:05 +0200
committer	Rainer Gerhards <rgerhards@adiscon.com>	2009-10-07 10:53:05 +0200
commit	92ec206279e29d12d3d44e51280485d641579e41 (patch)
tree	7b8f3c0216f67f7a34c119d27bd9ad9bf339f87b /runtime
parent	caeb203f5188aafccc667c1afc42a49df80d0747 (diff)
download	rsyslog-92ec206279e29d12d3d44e51280485d641579e41.tar.gz rsyslog-92ec206279e29d12d3d44e51280485d641579e41.tar.xz rsyslog-92ec206279e29d12d3d44e51280485d641579e41.zip