summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorRonnie Sahlberg <ronniesahlberg@gmail.com>2011-11-08 14:06:30 +1100
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2011-11-08 14:06:30 +1100
commit8db9b73920da63c9eaa0822c9dd550714484fa13 (patch)
tree38ff36f81bee2d74492886ebd7ce0210bcde1c57 /ctdb
parentafd0b6327348554894f62d889dedae21b574106e (diff)
parentcb21f178e32ec32ea927bd992b01c09e12d64d97 (diff)
downloadsamba-8db9b73920da63c9eaa0822c9dd550714484fa13.tar.gz
samba-8db9b73920da63c9eaa0822c9dd550714484fa13.tar.xz
samba-8db9b73920da63c9eaa0822c9dd550714484fa13.zip
Merge remote branch 'martins/lcp2fix'
(This used to be ctdb commit 7c02d242af552aa732f5c70ea4eeefbc8a8542e2)
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/server/ctdb_takeover.c152
-rw-r--r--ctdb/tests/src/ctdb_takeover_tests.c184
-rw-r--r--ctdb/tests/takeover/common.sh2
-rwxr-xr-xctdb/tests/takeover/testcases/lcp2.005.sh18
-rwxr-xr-xctdb/tests/takeover/testcases/lcp2.010.sh32
5 files changed, 318 insertions, 70 deletions
diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c
index 31946e70ec..2c9cd02bfb 100644
--- a/ctdb/server/ctdb_takeover.c
+++ b/ctdb/server/ctdb_takeover.c
@@ -1666,57 +1666,26 @@ void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
}
}
-/* LCP2 algorithm for rebalancing the cluster. This finds the source
- * node with the highest LCP2 imbalance, and then determines the best
- * IP/destination node combination to move from the source node.
+/* LCP2 algorithm for rebalancing the cluster. Given a candidate node
+ * to move IPs from, determines the best IP/destination node
+ * combination to move from the source node.
*
* Not static, so we can easily link it into a unit test.
*/
-bool lcp2_failback(struct ctdb_context *ctdb,
- struct ctdb_node_map *nodemap,
- uint32_t mask,
- struct ctdb_public_ip_list *all_ips,
- uint32_t *lcp2_imbalances,
- bool *newly_healthy)
-{
- int srcnode, dstnode, mindstnode, i, num_newly_healthy;
- uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum;
- uint32_t minsrcimbl, mindstimbl, b;
+bool lcp2_failback_candidate(struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ struct ctdb_public_ip_list *all_ips,
+ int srcnode,
+ uint32_t candimbl,
+ uint32_t *lcp2_imbalances,
+ bool *newly_healthy)
+{
+ int dstnode, mindstnode;
+ uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
+ uint32_t minsrcimbl, mindstimbl;
struct ctdb_public_ip_list *minip;
struct ctdb_public_ip_list *tmp_ip;
- /* It is only worth continuing if we have suitable target
- * nodes to transfer IPs to. This check is much cheaper than
- * continuing on...
- */
- num_newly_healthy = 0;
- for (i = 0; i < nodemap->num; i++) {
- if (newly_healthy[i]) {
- num_newly_healthy++;
- }
- }
- if (num_newly_healthy == 0) {
- return false;
- }
-
- /* Get the node with the highest imbalance metric. */
- srcnode = -1;
- maximbl = 0;
- for (i=0; i < nodemap->num; i++) {
- b = lcp2_imbalances[i];
- if ((srcnode == -1) || (b > maximbl)) {
- srcnode = i;
- maximbl = b;
- }
- }
-
- /* This means that all nodes had 0 or 1 addresses, so can't be
- * imbalanced.
- */
- if (maximbl == 0) {
- return false;
- }
-
/* Find an IP and destination node that best reduces imbalance. */
minip = NULL;
minsrcimbl = 0;
@@ -1724,7 +1693,7 @@ bool lcp2_failback(struct ctdb_context *ctdb,
mindstimbl = 0;
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
- DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
/* Only consider addresses on srcnode. */
@@ -1734,7 +1703,7 @@ bool lcp2_failback(struct ctdb_context *ctdb,
/* What is this IP address costing the source node? */
srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
- srcimbl = maximbl - srcdsum;
+ srcimbl = candimbl - srcdsum;
/* Consider this IP address would cost each potential
* destination node. Destination nodes are limited to
@@ -1759,7 +1728,7 @@ bool lcp2_failback(struct ctdb_context *ctdb,
ctdb_addr_to_str(&(tmp_ip->addr)),
dstnode, dstimbl - lcp2_imbalances[dstnode]));
- if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \
+ if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
((mindstnode == -1) || \
((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
@@ -1791,6 +1760,93 @@ bool lcp2_failback(struct ctdb_context *ctdb,
}
+struct lcp2_imbalance_pnn {
+ uint32_t imbalance;
+ int pnn;
+};
+
+int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
+{
+ const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
+ const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
+
+ if (lipa->imbalance > lipb->imbalance) {
+ return -1;
+ } else if (lipa->imbalance == lipb->imbalance) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+/* LCP2 algorithm for rebalancing the cluster. This finds the source
+ * node with the highest LCP2 imbalance, and then determines the best
+ * IP/destination node combination to move from the source node.
+ *
+ * Not static, so we can easily link it into a unit test.
+ */
+bool lcp2_failback(struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ uint32_t mask,
+ struct ctdb_public_ip_list *all_ips,
+ uint32_t *lcp2_imbalances,
+ bool *newly_healthy)
+{
+ int i, num_newly_healthy;
+ struct lcp2_imbalance_pnn * lips;
+ bool ret;
+
+ /* It is only worth continuing if we have suitable target
+ * nodes to transfer IPs to. This check is much cheaper than
+ * continuing on...
+ */
+ num_newly_healthy = 0;
+ for (i = 0; i < nodemap->num; i++) {
+ if (newly_healthy[i]) {
+ num_newly_healthy++;
+ }
+ }
+ if (num_newly_healthy == 0) {
+ return false;
+ }
+
+ /* Put the imbalances and nodes into an array, sort them and
+ * iterate through candidates. Usually the 1st one will be
+ * used, so this doesn't cost much...
+ */
+ lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
+ for (i = 0; i < nodemap->num; i++) {
+ lips[i].imbalance = lcp2_imbalances[i];
+ lips[i].pnn = i;
+ }
+ qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
+ lcp2_cmp_imbalance_pnn);
+
+ ret = false;
+ for (i = 0; i < nodemap->num; i++) {
+ /* This means that all nodes had 0 or 1 addresses, so
+ * can't be imbalanced.
+ */
+ if (lips[i].imbalance == 0) {
+ break;
+ }
+
+ if (lcp2_failback_candidate(ctdb,
+ nodemap,
+ all_ips,
+ lips[i].pnn,
+ lips[i].imbalance,
+ lcp2_imbalances,
+ newly_healthy)) {
+ ret = true;
+ break;
+ }
+ }
+
+ talloc_free(lips);
+ return ret;
+}
+
/* The calculation part of the IP allocation algorithm.
* Not static, so we can easily link it into a unit test.
*/
diff --git a/ctdb/tests/src/ctdb_takeover_tests.c b/ctdb/tests/src/ctdb_takeover_tests.c
index a150018f11..6f83b5994a 100644
--- a/ctdb/tests/src/ctdb_takeover_tests.c
+++ b/ctdb/tests/src/ctdb_takeover_tests.c
@@ -20,6 +20,10 @@
#include "includes.h"
#include "../include/ctdb_private.h"
+/* This is lazy... but it is test code! */
+#define CTDB_TEST_MAX_NODES 256
+#define CTDB_TEST_MAX_IPS 256
+
/*
* Need these, since they're defined in ctdbd.c but we can't link
* that.
@@ -102,6 +106,157 @@ void ctdb_test_read_ctdb_public_ip_list(void)
talloc_free(tmp_ctx);
}
+/* Format of each line is "IP pnn" - the separator has to be at least
+ * 1 space (not a tab or whatever - a space!).
+ */
+static bool
+read_ctdb_public_ip_info(TALLOC_CTX *ctx,
+ int numnodes,
+ struct ctdb_public_ip_list ** all_ips,
+ struct ctdb_all_public_ips *** avail)
+{
+ char line[1024];
+ ctdb_sock_addr addr;
+ char *t, *tok;
+ struct ctdb_public_ip_list * ta;
+ int pnn, numips, curr, n, i;
+ struct ctdb_all_public_ips * a;
+
+ struct ctdb_public_ip_list *last = NULL;
+
+ *avail = talloc_array_size(ctx, sizeof(struct ctdb_all_public_ips *), CTDB_TEST_MAX_NODES);
+ memset(*avail, 0,
+ sizeof(struct ctdb_all_public_ips *) * CTDB_TEST_MAX_NODES);
+
+ numips = 0;
+ *all_ips = NULL;
+ while (fgets(line, sizeof(line), stdin) != NULL) {
+
+ /* Get rid of pesky newline */
+ if ((t = strchr(line, '\n')) != NULL) {
+ *t = '\0';
+ }
+
+ /* Get the IP address */
+ tok = strtok(line, " \t");
+ if (tok == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " WARNING, bad line ignored :%s\n", line));
+ continue;
+ }
+
+ if (!parse_ip(tok, NULL, 0, &addr)) {
+ DEBUG(DEBUG_ERR, (__location__ " ERROR, bad address :%s\n", tok));
+ continue;
+ }
+
+ numips++;
+
+ /* Get the PNN */
+ pnn = -1;
+ tok = strtok(NULL, " \t");
+ if (tok != NULL) {
+ pnn = (int) strtol(tok, (char **) NULL, 10);
+ }
+
+ /* Add address + pnn to all_ips */
+ if (last == NULL) {
+ last = talloc(ctx, struct ctdb_public_ip_list);
+ } else {
+ last->next = talloc(ctx, struct ctdb_public_ip_list);
+ last = last->next;
+ }
+ last->next = NULL;
+ last->pnn = pnn;
+ memcpy(&(last->addr), &addr, sizeof(addr));
+ if (*all_ips == NULL) {
+ *all_ips = last;
+ }
+
+ tok = strtok(NULL, " \t#");
+ if (tok == NULL) {
+ continue;
+ }
+
+ /* Handle allowed nodes for addr */
+ t = strtok(tok, ",");
+ while (t != NULL) {
+ n = (int) strtol(t, (char **) NULL, 10);
+ if ((*avail)[n] == NULL) {
+ (*avail)[n] = talloc_array(ctx, struct ctdb_all_public_ips, CTDB_TEST_MAX_IPS);
+ (*avail)[n]->num = 0;
+ }
+ curr = (*avail)[n]->num;
+ (*avail)[n]->ips[curr].pnn = pnn;
+ memcpy(&((*avail)[n]->ips[curr].addr),
+ &addr, sizeof(addr));
+ (*avail)[n]->num++;
+ t = strtok(NULL, ",");
+ }
+
+ }
+
+ /* Build list of all allowed IPs */
+ a = talloc_array(ctx, struct ctdb_all_public_ips, CTDB_TEST_MAX_IPS);
+ a->num = numips;
+ for (ta = *all_ips, i=0; ta != NULL && i < numips ; ta = ta->next, i++) {
+ a->ips[i].pnn = ta->pnn;
+ memcpy(&(a->ips[i].addr), &(ta->addr), sizeof(ta->addr));
+ }
+
+ /* Assign it to any nodes that don't have a list assigned */
+ for (n = 0; n < numnodes; n++) {
+ if ((*avail)[n] == NULL) {
+ (*avail)[n] = a;
+ }
+ }
+
+ return true;
+}
+
+void print_ctdb_available_ips(int numnodes, struct ctdb_all_public_ips **avail)
+{
+ int n, i;
+
+ for (n = 0; n < numnodes; n++) {
+ if ((avail[n] != NULL) && (avail[n]->num > 0)) {
+ printf("%d:", n);
+ for (i = 0; i < avail[n]->num; i++) {
+ printf("%s%s",
+ (i == 0) ? " " : ", ",
+ ctdb_addr_to_str(&(avail[n]->ips[i].addr)));
+ }
+ printf("\n");
+ }
+ }
+}
+
+void ctdb_test_read_ctdb_public_ip_info(const char nodestates[])
+{
+ int numnodes;
+ struct ctdb_public_ip_list *l;
+ struct ctdb_all_public_ips **avail;
+ char *tok, *ns;
+
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+ /* Avoid that const */
+ ns = talloc_strdup(tmp_ctx, nodestates);
+
+ numnodes = 0;
+ tok = strtok(ns, ",");
+ while (tok != NULL) {
+ numnodes++;
+ tok = strtok(NULL, ",");
+ }
+
+ read_ctdb_public_ip_info(tmp_ctx, numnodes, &l, &avail);
+
+ print_ctdb_public_ip_list(l);
+ print_ctdb_available_ips(numnodes, avail);
+
+ talloc_free(tmp_ctx);
+}
+
/* Read 2 IPs from stdin, calculate the IP distance and print it. */
void ctdb_test_ip_distance(void)
{
@@ -180,11 +335,9 @@ void ctdb_test_init(const char nodestates[],
struct ctdb_public_ip_list **all_ips,
struct ctdb_node_map **nodemap)
{
- struct ctdb_public_ip_list *t;
- struct ctdb_all_public_ips *available_public_ips;
- int i, numips, numnodes;
- /* This is test code and this is unreasonably big... :-) */
- uint32_t nodeflags[256];
+ struct ctdb_all_public_ips **avail;
+ int i, numnodes;
+ uint32_t nodeflags[CTDB_TEST_MAX_NODES];
char *tok, *ns;
*ctdb = talloc_zero(NULL, struct ctdb_context);
@@ -218,23 +371,10 @@ void ctdb_test_init(const char nodestates[],
*nodemap = talloc_array(*ctdb, struct ctdb_node_map, numnodes);
(*nodemap)->num = numnodes;
- *all_ips = read_ctdb_public_ip_list(*ctdb);
- numips = 0;
- for (t = *all_ips; t != NULL; t = t->next) {
- numips++;
- }
-
- available_public_ips = talloc_array(*ctdb, struct ctdb_all_public_ips, numips); // FIXME: bogus size, overkill
- available_public_ips->num = numips;
- for (t = *all_ips, i=0; t != NULL && i < numips ; t = t->next, i++) {
- available_public_ips->ips[i].pnn = t->pnn;
- memcpy(&(available_public_ips->ips[i].addr), &(t->addr), sizeof(t->addr));
- }
+ read_ctdb_public_ip_info(*ctdb, numnodes, all_ips, &avail);
(*ctdb)->nodes = talloc_array(*ctdb, struct ctdb_node *, numnodes); // FIXME: bogus size, overkill
- /* Setup both nodemap and ctdb->nodes. Mark all nodes as
- * healthy - change this later. */
for (i=0; i < numnodes; i++) {
(*nodemap)->nodes[i].pnn = i;
(*nodemap)->nodes[i].flags = nodeflags[i];
@@ -243,8 +383,8 @@ void ctdb_test_init(const char nodestates[],
(*ctdb)->nodes[i] = talloc(*ctdb, struct ctdb_node);
(*ctdb)->nodes[i]->pnn = i;
(*ctdb)->nodes[i]->flags = nodeflags[i];
- (*ctdb)->nodes[i]->available_public_ips = available_public_ips;
- (*ctdb)->nodes[i]->known_public_ips = available_public_ips;
+ (*ctdb)->nodes[i]->available_public_ips = avail[i];
+ (*ctdb)->nodes[i]->known_public_ips = avail[i];
}
}
@@ -361,6 +501,8 @@ int main(int argc, const char *argv[])
if (strcmp(argv[1], "ip_list") == 0) {
ctdb_test_read_ctdb_public_ip_list();
+ } else if (argc == 3 && strcmp(argv[1], "ip_info") == 0) {
+ ctdb_test_read_ctdb_public_ip_info(argv[2]);
} else if (strcmp(argv[1], "ip_distance") == 0) {
ctdb_test_ip_distance();
} else if (argc == 4 && strcmp(argv[1], "ip_distance_2_sum") == 0) {
diff --git a/ctdb/tests/takeover/common.sh b/ctdb/tests/takeover/common.sh
index 8f7fee28ee..716e4d5fc7 100644
--- a/ctdb/tests/takeover/common.sh
+++ b/ctdb/tests/takeover/common.sh
@@ -15,7 +15,7 @@ define_test ()
case "$_f" in
nondet.*)
algorithm="nondet"
- CTDB_LCP2="no"
+ export CTDB_LCP2="no"
;;
lcp2.*)
algorithm="lcp2"
diff --git a/ctdb/tests/takeover/testcases/lcp2.005.sh b/ctdb/tests/takeover/testcases/lcp2.005.sh
index e955eab6ed..3dbe532cb6 100755
--- a/ctdb/tests/takeover/testcases/lcp2.005.sh
+++ b/ctdb/tests/takeover/testcases/lcp2.005.sh
@@ -139,6 +139,24 @@ DATE TIME [PID]: 0 [-29786] -> 192.168.20.253 -> 2 [+45662]
DATE TIME [PID]: 0 [-29786] -> 192.168.20.250 -> 0 [+29786]
DATE TIME [PID]: 0 [-29786] -> 192.168.20.250 -> 2 [+45915]
DATE TIME [PID]: ----------------------------------------
+DATE TIME [PID]: ----------------------------------------
+DATE TIME [PID]: CONSIDERING MOVES FROM 2 [43947]
+DATE TIME [PID]: 2 [-28322] -> 192.168.21.254 -> 0 [+44198]
+DATE TIME [PID]: 2 [-28322] -> 192.168.21.254 -> 2 [+28322]
+DATE TIME [PID]: 2 [-29786] -> 192.168.20.254 -> 0 [+45662]
+DATE TIME [PID]: 2 [-29786] -> 192.168.20.254 -> 2 [+29786]
+DATE TIME [PID]: 2 [-29786] -> 192.168.20.251 -> 0 [+45915]
+DATE TIME [PID]: 2 [-29786] -> 192.168.20.251 -> 2 [+29786]
+DATE TIME [PID]: ----------------------------------------
+DATE TIME [PID]: ----------------------------------------
+DATE TIME [PID]: CONSIDERING MOVES FROM 1 [43744]
+DATE TIME [PID]: 1 [-28322] -> 192.168.21.252 -> 0 [+44451]
+DATE TIME [PID]: 1 [-28322] -> 192.168.21.252 -> 2 [+44198]
+DATE TIME [PID]: 1 [-29786] -> 192.168.20.252 -> 0 [+45915]
+DATE TIME [PID]: 1 [-29786] -> 192.168.20.252 -> 2 [+45662]
+DATE TIME [PID]: 1 [-29786] -> 192.168.20.249 -> 0 [+45662]
+DATE TIME [PID]: 1 [-29786] -> 192.168.20.249 -> 2 [+45662]
+DATE TIME [PID]: ----------------------------------------
192.168.21.254 2
192.168.21.253 0
192.168.21.252 1
diff --git a/ctdb/tests/takeover/testcases/lcp2.010.sh b/ctdb/tests/takeover/testcases/lcp2.010.sh
new file mode 100755
index 0000000000..c47b6528cd
--- /dev/null
+++ b/ctdb/tests/takeover/testcases/lcp2.010.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TAKEOVER_TESTS_DIR}/common.sh"
+
+define_test "2 disjoint groups of nodes/addresses, a node becomes healthy"
+
+# This illustrates a bug in LCP2 when the the only candidate for a
+# source node is chosen to be the "most imbalanced" node. This means
+# that nodes in the smaller group aren't necessarily (depends on sort
+# order and addresses used) considered as candidates. If the larger
+# group has 6 addresses then the "necessarily" goes away and the
+# smaller group won't be rebalanced.
+
+export CTDB_TEST_LOGLEVEL=0
+
+required_result <<EOF
+192.168.209.102 3
+192.168.209.101 2
+192.168.140.4 1
+192.168.140.3 1
+192.168.140.2 0
+192.168.140.1 0
+EOF
+
+simple_test 0,0,0,0 <<EOF
+192.168.140.1 0 0,1
+192.168.140.2 0 0,1
+192.168.140.3 1 0,1
+192.168.140.4 1 0,1
+192.168.209.101 2 2,3
+192.168.209.102 2 2,3
+EOF