summaryrefslogtreecommitdiffstats
path: root/ctdb/server/ctdb_recoverd.c
diff options
context:
space:
mode:
authorStefan Metzmacher <metze@samba.org>2009-10-09 15:47:49 +0200
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2009-10-26 14:21:45 +1100
commit198866d82d4487f6d45104e051ca94b8072a21f2 (patch)
tree0dcc052af97fedb4f8ceeef9a62ae5ab0977e4d2 /ctdb/server/ctdb_recoverd.c
parent7a616a0d7b5c0b83822a3c0084c0dc82060b546a (diff)
downloadsamba-198866d82d4487f6d45104e051ca94b8072a21f2.tar.gz
samba-198866d82d4487f6d45104e051ca94b8072a21f2.tar.xz
samba-198866d82d4487f6d45104e051ca94b8072a21f2.zip
server: if takeover runs when the recovery master becomes unhealthy
The problem was this: When the monitor event fails, the node->flags get updated, and an update (containing the old and new flags) is sent to the recovery master. If the recovery master sends the update to itself (the same process), it was compairing the node->flags variable with the received new flags. This check always found both flag values to be equal and never sets the rec->need_takeover_run variable to true. There were two problem, first the push_flags_handler() function didn't pass the received old flags. And the ctdb_control_modflags() function ignored the received old flags. metze (This used to be ctdb commit 8ec633b64a05a2d903c2b9639909f15f6375548f)
Diffstat (limited to 'ctdb/server/ctdb_recoverd.c')
-rw-r--r--ctdb/server/ctdb_recoverd.c40
1 files changed, 38 insertions, 2 deletions
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index 6a453f98d8b..ecdcd99dd18 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -2056,11 +2056,47 @@ static void push_flags_handler(struct ctdb_context *ctdb, uint64_t srvid,
{
int ret;
struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
+ struct ctdb_node_map *nodemap=NULL;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ uint32_t recmaster;
+ uint32_t *nodes;
- ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), c->pnn, c->new_flags, ~c->new_flags);
+ /* find the recovery master */
+ ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
- DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ /* read the node flags from the recmaster */
+ ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), recmaster, tmp_ctx, &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", c->pnn));
+ talloc_free(tmp_ctx);
+ return;
}
+ if (c->pnn >= nodemap->num) {
+ DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", c->pnn));
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ /* send the flags update to all connected nodes */
+ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
+ nodes, 0, CONTROL_TIMEOUT(),
+ false, data,
+ NULL, NULL,
+ NULL) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb_control to modify node flags failed\n"));
+
+ talloc_free(tmp_ctx);
+ return;
+ }
+
+ talloc_free(tmp_ctx);
}