summaryrefslogtreecommitdiffstats
path: root/ctdb/server/ctdb_recoverd.c
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2013-09-17 12:00:26 +1000
committerAmitay Isaacs <amitay@gmail.com>2013-09-19 12:54:29 +1000
commit30a50c6e1e3047e89b4b88693a0f1e8ad9e6f6a0 (patch)
tree2fd8dc6c4b9dc111e3e0ff0b9bbbbcf38a05ea07 /ctdb/server/ctdb_recoverd.c
parent630196423a112a35187f82d704730cb7d847bf3d (diff)
downloadsamba-30a50c6e1e3047e89b4b88693a0f1e8ad9e6f6a0.tar.gz
samba-30a50c6e1e3047e89b4b88693a0f1e8ad9e6f6a0.tar.xz
samba-30a50c6e1e3047e89b4b88693a0f1e8ad9e6f6a0.zip
recoverd: Stabilise the recovery master role
On rare occasions when a node that has been inactive it will trigger an election when it becomes active again. If that node has been up for the longest then it will win the election and the recovery master role will spuriously move. While a node remains inactive we reset the priority time to discourage it from winning elections. The priority time will now reflect roughly how long the node has been active rather than how long it has been up. That means the most stable node is more likely to win elections. Having a stable recovery master means that disabling takeover runs while reloading IPs is more likely to succeed. It also improves the chances of being able to cache information in the recovery master - for example, between takeover runs. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit f0f48f22f45e4c82eba2582efae307e25385de81)
Diffstat (limited to 'ctdb/server/ctdb_recoverd.c')
-rw-r--r--ctdb/server/ctdb_recoverd.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index da88f16cab4..8df59be68ca 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -3442,6 +3442,14 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
also frozen and that the recmode is set to active.
*/
if (rec->node_flags & (NODE_FLAGS_STOPPED | NODE_FLAGS_BANNED)) {
+ /* If this node has become inactive then we want to
+ * reduce the chances of it taking over the recovery
+ * master role when it becomes active again. This
+ * helps to stabilise the recovery master role so that
+ * it stays on the most stable node.
+ */
+ rec->priority_time = timeval_current();
+
ret = ctdb_ctrl_getrecmode(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->recovery_mode);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to read recmode from local node\n"));