server: if takeover runs when the recovery master becomes unhealthy

The problem was this: When the monitor event fails, the node->flags get updated, and an update (containing the old and new flags) is sent to the recovery master. If the recovery master sends the update to itself (the same process), it was compairing the node->flags variable with the received new flags. This check always found both flag values to be equal and never sets the rec->need_takeover_run variable to true. There were two problem, first the push_flags_handler() function didn't pass the received old flags. And the ctdb_control_modflags() function ignored the received old flags. metze (This used to be ctdb commit 8ec633b64a05a2d903c2b9639909f15f6375548f)
author: Stefan Metzmacher <metze@samba.org> 2009-10-09 15:47:49 +0200
committer: Ronnie Sahlberg <ronniesahlberg@gmail.com> 2009-10-26 14:21:45 +1100
commit: 198866d82d4487f6d45104e051ca94b8072a21f2 (patch)
tree: 0dcc052af97fedb4f8ceeef9a62ae5ab0977e4d2 /ctdb/server/ctdb_recoverd.c
parent: 7a616a0d7b5c0b83822a3c0084c0dc82060b546a (diff)
download: samba-198866d82d4487f6d45104e051ca94b8072a21f2.tar.gz
samba-198866d82d4487f6d45104e051ca94b8072a21f2.tar.xz
samba-198866d82d4487f6d45104e051ca94b8072a21f2.zip
1 files changed, 38 insertions, 2 deletions
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index 6a453f98d8b..ecdcd99dd18 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -2056,11 +2056,47 @@ static void push_flags_handler(struct ctdb_context *ctdb, uint64_t srvid,
 {
 	int ret;
 	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
+	struct ctdb_node_map *nodemap=NULL;
+	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+	uint32_t recmaster;
+	uint32_t *nodes;
 
-	ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), c->pnn, c->new_flags, ~c->new_flags);
+	/* find the recovery master */
+	ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
 	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+		DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
+		talloc_free(tmp_ctx);
+		return;
+	}
+
+	/* read the node flags from the recmaster */
+	ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), recmaster, tmp_ctx, &nodemap);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", c->pnn));
+		talloc_free(tmp_ctx);
+		return;
 	}
+	if (c->pnn >= nodemap->num) {
+		DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", c->pnn));
+		talloc_free(tmp_ctx);
+		return;
+	}
+
+	/* send the flags update to all connected nodes */
+	nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+	if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
+				      nodes, 0, CONTROL_TIMEOUT(),
+				      false, data,
+				      NULL, NULL,
+				      NULL) != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " ctdb_control to modify node flags failed\n"));
+
+		talloc_free(tmp_ctx);
+		return;
+	}
+
+	talloc_free(tmp_ctx);
 }
author	Stefan Metzmacher <metze@samba.org>	2009-10-09 15:47:49 +0200
committer	Ronnie Sahlberg <ronniesahlberg@gmail.com>	2009-10-26 14:21:45 +1100
commit	198866d82d4487f6d45104e051ca94b8072a21f2 (patch)
tree	0dcc052af97fedb4f8ceeef9a62ae5ab0977e4d2 /ctdb/server/ctdb_recoverd.c
parent	7a616a0d7b5c0b83822a3c0084c0dc82060b546a (diff)
download	samba-198866d82d4487f6d45104e051ca94b8072a21f2.tar.gz samba-198866d82d4487f6d45104e051ca94b8072a21f2.tar.xz samba-198866d82d4487f6d45104e051ca94b8072a21f2.zip