daemon: On shutdown, destroy timed events that check if recoverd is active

When CTDB is shutting down, recovery daemon is stopped, but the event that checks if recovery daemon is still alive is not destroyed. So recovery master is restarted during shutdown if CTDB daemon takes longer to shutdown. There are two processes that check if recovery daemon is working. 1. ctdb_check_recd() - which checks every 30 seconds if the recovery daemon process exists. 2. ctdb_recd_ping_timeout() - which is triggered when recovery daemon fails to ping CTDB daemon. Both the events are periodic and need to be destroyed when shutting down. Signed-off-by: Amitay Isaacs <amitay@gmail.com> (This used to be ctdb commit 746168df2e691058e601016110fae818c6a265c3)
author: Amitay Isaacs <amitay@gmail.com> 2012-12-04 15:05:44 +1100
committer: Amitay Isaacs <amitay@gmail.com> 2013-01-09 13:20:26 +1100
commit: 30299c387f3c9695afb716b3787035cf7c441333 (patch)
tree: 2c4c65b716a441a7c0014aeefb4beceaf12851f3 /ctdb
parent: cad815164c07c08b0583509553cc53bd3b3c51cc (diff)
download: samba-30299c387f3c9695afb716b3787035cf7c441333.tar.gz
samba-30299c387f3c9695afb716b3787035cf7c441333.tar.xz
samba-30299c387f3c9695afb716b3787035cf7c441333.zip
3 files changed, 15 insertions, 7 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 152af64f684..fb541958f4a 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -508,6 +508,7 @@ struct ctdb_context {
 	bool valgrinding;
 	uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
 	uint32_t *recd_ping_count;
+	TALLOC_CTX *recd_ctx; /* a context used to track recoverd monitoring events */
 	TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
 
 	TALLOC_CTX *event_script_ctx;
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c
index 719352a3dbc..32c87bbb561 100644
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@ -1162,6 +1162,10 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda
 	return 0;	
 }
 
+/* The recovery daemon will ping us at regular intervals.
+   If we havent been pinged for a while we assume the recovery
+   daemon is inoperable and we restart.
+*/
 static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
 {
 	struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
@@ -1183,10 +1187,6 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event
 	ctdb_start_recoverd(ctdb);
 }
 
-/* The recovery daemon will ping us at regular intervals.
-   If we havent been pinged for a while we assume the recovery
-   daemon is inoperable and we shut down.
-*/
 int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
 {
 	talloc_free(ctdb->recd_ping_count);
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index d50e84e82ee..5f8304447d8 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -4018,10 +4018,14 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
 	if (ctdb->recoverd_pid == -1) {
 		return -1;
 	}
-	
+
 	if (ctdb->recoverd_pid != 0) {
+		talloc_free(ctdb->recd_ctx);
+		ctdb->recd_ctx = talloc_new(ctdb);
+		CTDB_NO_MEMORY(ctdb, ctdb->recd_ctx);
+
 		close(fd[0]);
-		event_add_timed(ctdb->ev, ctdb, 
+		event_add_timed(ctdb->ev, ctdb->recd_ctx,
 				timeval_current_ofs(30, 0),
 				ctdb_check_recd, ctdb);
 		return 0;
@@ -4039,7 +4043,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
 	DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to recovery daemon\n", fd[0]));
 
 	fde = event_add_fd(ctdb->ev, ctdb, fd[0], EVENT_FD_READ,
-		     ctdb_recoverd_parent, &fd[0]);	
+		     ctdb_recoverd_parent, &fd[0]);
 	tevent_fd_set_auto_close(fde);
 
 	/* set up a handler to pick up sigchld */
@@ -4069,6 +4073,9 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb)
 
 	DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
 	ctdb_kill(ctdb, ctdb->recoverd_pid, SIGTERM);
+
+	TALLOC_FREE(ctdb->recd_ctx);
+	TALLOC_FREE(ctdb->recd_ping_count);
 }
 
 static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te,
author	Amitay Isaacs <amitay@gmail.com>	2012-12-04 15:05:44 +1100
committer	Amitay Isaacs <amitay@gmail.com>	2013-01-09 13:20:26 +1100
commit	30299c387f3c9695afb716b3787035cf7c441333 (patch)
tree	2c4c65b716a441a7c0014aeefb4beceaf12851f3 /ctdb
parent	cad815164c07c08b0583509553cc53bd3b3c51cc (diff)
download	samba-30299c387f3c9695afb716b3787035cf7c441333.tar.gz samba-30299c387f3c9695afb716b3787035cf7c441333.tar.xz samba-30299c387f3c9695afb716b3787035cf7c441333.zip