summaryrefslogtreecommitdiffstats
path: root/ctdb/tests
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2013-09-04 14:30:04 +1000
committerAmitay Isaacs <amitay@gmail.com>2013-09-19 12:54:31 +1000
commitb33ee7a2a4ac0cde9ebd1038591d8ca9a9867478 (patch)
tree475b0a11eeee8599ef10da5b9e40c6fd342f2b8e /ctdb/tests
parent1793412de2f2300d08df727f3433deb315f2aae6 (diff)
downloadsamba-b33ee7a2a4ac0cde9ebd1038591d8ca9a9867478.tar.gz
samba-b33ee7a2a4ac0cde9ebd1038591d8ca9a9867478.tar.xz
samba-b33ee7a2a4ac0cde9ebd1038591d8ca9a9867478.zip
recoverd: Fix the implementation of CTDB_SRVID_REBALANCE_NODE
The current implementation has a few flaws: * A takeover run is called unconditionally when the timer goes even if the recovery master role has moved. This means a node other than the recovery master can incorrectly do a takeover run. * The rebalancing target nodes are cleared in the setup for a takeover run, regardless of whether the takeover run succeeds. * The timer to force a rebalance isn't cleared if another takeover run occurs before the deadline. Any forced rebalancing will happen in the first takeover run and when the timer expires some time later then an unnecessary takeover run will occur. * If the recovery master role moves then the rebalancing data will stay on the original node and affect the next takeover run to occur if the recovery master role should come back to the original node. Instead, store an array of rebalance target nodes in the recovery master context. This is passed as an extra argument to ctdb_takeover_run() each time it is called and is cleared when a takeover run succeeds. The timer hangs off the array of rebalance target nodes, which is cleared if the node isn't the recovery master. This means that it is possible to lose rebalance data if the recovery master role moves. However, that's a difficult problem to solve. The best way of approaching it is probably to try to stop the recovery master role from jumping around unnecesarily when inactive nodes join the cluster. The long term solution is to avoid this nonsense completely. The IP allocation algorithm needs to cache state between runs so that it knows which nodes have just become healthy. This also needs recovery master stability. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit c51c1efe5fc7fa668597f2acd435dee16e410fc9)
Diffstat (limited to 'ctdb/tests')
-rw-r--r--ctdb/tests/src/ctdb_takeover_tests.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/ctdb/tests/src/ctdb_takeover_tests.c b/ctdb/tests/src/ctdb_takeover_tests.c
index 1aa0620523..7fd989eaf6 100644
--- a/ctdb/tests/src/ctdb_takeover_tests.c
+++ b/ctdb/tests/src/ctdb_takeover_tests.c
@@ -512,7 +512,8 @@ void ctdb_test_lcp2_allocate_unassigned(const char nodestates[])
ctdb_test_init(nodestates, &ctdb, &all_ips, &ipflags, false);
- lcp2_init(ctdb, ipflags, all_ips, &lcp2_imbalances, &newly_healthy);
+ lcp2_init(ctdb, ipflags, all_ips, NULL,
+ &lcp2_imbalances, &newly_healthy);
lcp2_allocate_unassigned(ctdb, ipflags,
all_ips, lcp2_imbalances);
@@ -534,7 +535,8 @@ void ctdb_test_lcp2_failback(const char nodestates[])
ctdb_test_init(nodestates, &ctdb, &all_ips, &ipflags, false);
- lcp2_init(ctdb, ipflags, all_ips, &lcp2_imbalances, &newly_healthy);
+ lcp2_init(ctdb, ipflags, all_ips, NULL,
+ &lcp2_imbalances, &newly_healthy);
lcp2_failback(ctdb, ipflags,
all_ips, lcp2_imbalances, newly_healthy);
@@ -556,7 +558,8 @@ void ctdb_test_lcp2_failback_loop(const char nodestates[])
ctdb_test_init(nodestates, &ctdb, &all_ips, &ipflags, false);
- lcp2_init(ctdb, ipflags, all_ips, &lcp2_imbalances, &newly_healthy);
+ lcp2_init(ctdb, ipflags, all_ips, NULL,
+ &lcp2_imbalances, &newly_healthy);
lcp2_failback(ctdb, ipflags,
all_ips, lcp2_imbalances, newly_healthy);
@@ -579,7 +582,7 @@ void ctdb_test_ctdb_takeover_run_core(const char nodestates[],
ctdb_test_init(nodestates, &ctdb, &all_ips, &ipflags,
read_ips_for_multiple_nodes);
- ctdb_takeover_run_core(ctdb, ipflags, &all_ips);
+ ctdb_takeover_run_core(ctdb, ipflags, &all_ips, NULL);
print_ctdb_public_ip_list(all_ips);