summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2011-11-01 19:49:38 +1100
committerMartin Schwenke <martin@meltin.net>2011-11-01 21:01:25 +1100
commit98c27f973d2b7814ec6a77ad006926f0be6111d4 (patch)
treea7fa34e1b3aae93149072e56897026ad674f7c60
parent552412d1805a38debb642bd9466126df3b12fb60 (diff)
downloadsamba-98c27f973d2b7814ec6a77ad006926f0be6111d4.tar.gz
samba-98c27f973d2b7814ec6a77ad006926f0be6111d4.tar.xz
samba-98c27f973d2b7814ec6a77ad006926f0be6111d4.zip
LCP IP allocation algorithm - new function lcp2_failback_candidate()
There's a bug in LCP2. Selecting the node with the highest imbalance doesn't always work. Some nodes can have a high imbalance metric because they have a lot of IPs. However, these nodes can be part of a group that is perfectly balanced. Nodes in another group with less IPs might actually be imbalanced. Factor out the code from lcp2_failback() that actually takes a node and decides which address should be moved to which node. This is the first step in fixing the above bug. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 75718c5768b5bb5c0bcd7dd90e0327c6ed22a63d)
-rw-r--r--ctdb/server/ctdb_takeover.c122
1 files changed, 74 insertions, 48 deletions
diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c
index a7f125041f..ca931f24ab 100644
--- a/ctdb/server/ctdb_takeover.c
+++ b/ctdb/server/ctdb_takeover.c
@@ -1665,57 +1665,26 @@ void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
}
}
-/* LCP2 algorithm for rebalancing the cluster. This finds the source
- * node with the highest LCP2 imbalance, and then determines the best
- * IP/destination node combination to move from the source node.
+/* LCP2 algorithm for rebalancing the cluster. Given a candidate node
+ * to move IPs from, determines the best IP/destination node
+ * combination to move from the source node.
*
* Not static, so we can easily link it into a unit test.
*/
-bool lcp2_failback(struct ctdb_context *ctdb,
- struct ctdb_node_map *nodemap,
- uint32_t mask,
- struct ctdb_public_ip_list *all_ips,
- uint32_t *lcp2_imbalances,
- bool *newly_healthy)
-{
- int srcnode, dstnode, mindstnode, i, num_newly_healthy;
- uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum;
- uint32_t minsrcimbl, mindstimbl, b;
+bool lcp2_failback_candidate(struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ struct ctdb_public_ip_list *all_ips,
+ int srcnode,
+ uint32_t candimbl,
+ uint32_t *lcp2_imbalances,
+ bool *newly_healthy)
+{
+ int dstnode, mindstnode;
+ uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
+ uint32_t minsrcimbl, mindstimbl;
struct ctdb_public_ip_list *minip;
struct ctdb_public_ip_list *tmp_ip;
- /* It is only worth continuing if we have suitable target
- * nodes to transfer IPs to. This check is much cheaper than
- * continuing on...
- */
- num_newly_healthy = 0;
- for (i = 0; i < nodemap->num; i++) {
- if (newly_healthy[i]) {
- num_newly_healthy++;
- }
- }
- if (num_newly_healthy == 0) {
- return false;
- }
-
- /* Get the node with the highest imbalance metric. */
- srcnode = -1;
- maximbl = 0;
- for (i=0; i < nodemap->num; i++) {
- b = lcp2_imbalances[i];
- if ((srcnode == -1) || (b > maximbl)) {
- srcnode = i;
- maximbl = b;
- }
- }
-
- /* This means that all nodes had 0 or 1 addresses, so can't be
- * imbalanced.
- */
- if (maximbl == 0) {
- return false;
- }
-
/* Find an IP and destination node that best reduces imbalance. */
minip = NULL;
minsrcimbl = 0;
@@ -1723,7 +1692,7 @@ bool lcp2_failback(struct ctdb_context *ctdb,
mindstimbl = 0;
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
- DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
/* Only consider addresses on srcnode. */
@@ -1733,7 +1702,7 @@ bool lcp2_failback(struct ctdb_context *ctdb,
/* What is this IP address costing the source node? */
srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
- srcimbl = maximbl - srcdsum;
+ srcimbl = candimbl - srcdsum;
/* Consider this IP address would cost each potential
* destination node. Destination nodes are limited to
@@ -1758,7 +1727,7 @@ bool lcp2_failback(struct ctdb_context *ctdb,
ctdb_addr_to_str(&(tmp_ip->addr)),
dstnode, dstimbl - lcp2_imbalances[dstnode]));
- if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \
+ if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
((mindstnode == -1) || \
((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
@@ -1790,6 +1759,63 @@ bool lcp2_failback(struct ctdb_context *ctdb,
}
+/* LCP2 algorithm for rebalancing the cluster. This finds the source
+ * node with the highest LCP2 imbalance, and then determines the best
+ * IP/destination node combination to move from the source node.
+ *
+ * Not static, so we can easily link it into a unit test.
+ */
+bool lcp2_failback(struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ uint32_t mask,
+ struct ctdb_public_ip_list *all_ips,
+ uint32_t *lcp2_imbalances,
+ bool *newly_healthy)
+{
+ int srcnode, i, num_newly_healthy;
+ uint32_t maximbl, b;
+
+ /* It is only worth continuing if we have suitable target
+ * nodes to transfer IPs to. This check is much cheaper than
+ * continuing on...
+ */
+ num_newly_healthy = 0;
+ for (i = 0; i < nodemap->num; i++) {
+ if (newly_healthy[i]) {
+ num_newly_healthy++;
+ }
+ }
+ if (num_newly_healthy == 0) {
+ return false;
+ }
+
+ /* Get the node with the highest imbalance metric. */
+ srcnode = -1;
+ maximbl = 0;
+ for (i=0; i < nodemap->num; i++) {
+ b = lcp2_imbalances[i];
+ if ((srcnode == -1) || (b > maximbl)) {
+ srcnode = i;
+ maximbl = b;
+ }
+ }
+
+ /* This means that all nodes had 0 or 1 addresses, so can't be
+ * imbalanced.
+ */
+ if (maximbl == 0) {
+ return false;
+ }
+
+ return lcp2_failback_candidate(ctdb,
+ nodemap,
+ all_ips,
+ srcnode,
+ maximbl,
+ lcp2_imbalances,
+ newly_healthy);
+}
+
/* The calculation part of the IP allocation algorithm.
* Not static, so we can easily link it into a unit test.
*/