diff options
author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2011-11-08 14:06:30 +1100 |
---|---|---|
committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2011-11-08 14:06:30 +1100 |
commit | 8db9b73920da63c9eaa0822c9dd550714484fa13 (patch) | |
tree | 38ff36f81bee2d74492886ebd7ce0210bcde1c57 /ctdb | |
parent | afd0b6327348554894f62d889dedae21b574106e (diff) | |
parent | cb21f178e32ec32ea927bd992b01c09e12d64d97 (diff) | |
download | samba-8db9b73920da63c9eaa0822c9dd550714484fa13.tar.gz samba-8db9b73920da63c9eaa0822c9dd550714484fa13.tar.xz samba-8db9b73920da63c9eaa0822c9dd550714484fa13.zip |
Merge remote branch 'martins/lcp2fix'
(This used to be ctdb commit 7c02d242af552aa732f5c70ea4eeefbc8a8542e2)
Diffstat (limited to 'ctdb')
-rw-r--r-- | ctdb/server/ctdb_takeover.c | 152 | ||||
-rw-r--r-- | ctdb/tests/src/ctdb_takeover_tests.c | 184 | ||||
-rw-r--r-- | ctdb/tests/takeover/common.sh | 2 | ||||
-rwxr-xr-x | ctdb/tests/takeover/testcases/lcp2.005.sh | 18 | ||||
-rwxr-xr-x | ctdb/tests/takeover/testcases/lcp2.010.sh | 32 |
5 files changed, 318 insertions, 70 deletions
diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 31946e70ec..2c9cd02bfb 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1666,57 +1666,26 @@ void lcp2_allocate_unassigned(struct ctdb_context *ctdb, } } -/* LCP2 algorithm for rebalancing the cluster. This finds the source - * node with the highest LCP2 imbalance, and then determines the best - * IP/destination node combination to move from the source node. +/* LCP2 algorithm for rebalancing the cluster. Given a candidate node + * to move IPs from, determines the best IP/destination node + * combination to move from the source node. * * Not static, so we can easily link it into a unit test. */ -bool lcp2_failback(struct ctdb_context *ctdb, - struct ctdb_node_map *nodemap, - uint32_t mask, - struct ctdb_public_ip_list *all_ips, - uint32_t *lcp2_imbalances, - bool *newly_healthy) -{ - int srcnode, dstnode, mindstnode, i, num_newly_healthy; - uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum; - uint32_t minsrcimbl, mindstimbl, b; +bool lcp2_failback_candidate(struct ctdb_context *ctdb, + struct ctdb_node_map *nodemap, + struct ctdb_public_ip_list *all_ips, + int srcnode, + uint32_t candimbl, + uint32_t *lcp2_imbalances, + bool *newly_healthy) +{ + int dstnode, mindstnode; + uint32_t srcimbl, srcdsum, dstimbl, dstdsum; + uint32_t minsrcimbl, mindstimbl; struct ctdb_public_ip_list *minip; struct ctdb_public_ip_list *tmp_ip; - /* It is only worth continuing if we have suitable target - * nodes to transfer IPs to. This check is much cheaper than - * continuing on... - */ - num_newly_healthy = 0; - for (i = 0; i < nodemap->num; i++) { - if (newly_healthy[i]) { - num_newly_healthy++; - } - } - if (num_newly_healthy == 0) { - return false; - } - - /* Get the node with the highest imbalance metric. */ - srcnode = -1; - maximbl = 0; - for (i=0; i < nodemap->num; i++) { - b = lcp2_imbalances[i]; - if ((srcnode == -1) || (b > maximbl)) { - srcnode = i; - maximbl = b; - } - } - - /* This means that all nodes had 0 or 1 addresses, so can't be - * imbalanced. - */ - if (maximbl == 0) { - return false; - } - /* Find an IP and destination node that best reduces imbalance. */ minip = NULL; minsrcimbl = 0; @@ -1724,7 +1693,7 @@ bool lcp2_failback(struct ctdb_context *ctdb, mindstimbl = 0; DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n")); - DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl)); + DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl)); for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) { /* Only consider addresses on srcnode. */ @@ -1734,7 +1703,7 @@ bool lcp2_failback(struct ctdb_context *ctdb, /* What is this IP address costing the source node? */ srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode); - srcimbl = maximbl - srcdsum; + srcimbl = candimbl - srcdsum; /* Consider this IP address would cost each potential * destination node. Destination nodes are limited to @@ -1759,7 +1728,7 @@ bool lcp2_failback(struct ctdb_context *ctdb, ctdb_addr_to_str(&(tmp_ip->addr)), dstnode, dstimbl - lcp2_imbalances[dstnode])); - if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \ + if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \ ((mindstnode == -1) || \ ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) { @@ -1791,6 +1760,93 @@ bool lcp2_failback(struct ctdb_context *ctdb, } +struct lcp2_imbalance_pnn { + uint32_t imbalance; + int pnn; +}; + +int lcp2_cmp_imbalance_pnn(const void * a, const void * b) +{ + const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a; + const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b; + + if (lipa->imbalance > lipb->imbalance) { + return -1; + } else if (lipa->imbalance == lipb->imbalance) { + return 0; + } else { + return 1; + } +} + +/* LCP2 algorithm for rebalancing the cluster. This finds the source + * node with the highest LCP2 imbalance, and then determines the best + * IP/destination node combination to move from the source node. + * + * Not static, so we can easily link it into a unit test. + */ +bool lcp2_failback(struct ctdb_context *ctdb, + struct ctdb_node_map *nodemap, + uint32_t mask, + struct ctdb_public_ip_list *all_ips, + uint32_t *lcp2_imbalances, + bool *newly_healthy) +{ + int i, num_newly_healthy; + struct lcp2_imbalance_pnn * lips; + bool ret; + + /* It is only worth continuing if we have suitable target + * nodes to transfer IPs to. This check is much cheaper than + * continuing on... + */ + num_newly_healthy = 0; + for (i = 0; i < nodemap->num; i++) { + if (newly_healthy[i]) { + num_newly_healthy++; + } + } + if (num_newly_healthy == 0) { + return false; + } + + /* Put the imbalances and nodes into an array, sort them and + * iterate through candidates. Usually the 1st one will be + * used, so this doesn't cost much... + */ + lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num); + for (i = 0; i < nodemap->num; i++) { + lips[i].imbalance = lcp2_imbalances[i]; + lips[i].pnn = i; + } + qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn), + lcp2_cmp_imbalance_pnn); + + ret = false; + for (i = 0; i < nodemap->num; i++) { + /* This means that all nodes had 0 or 1 addresses, so + * can't be imbalanced. + */ + if (lips[i].imbalance == 0) { + break; + } + + if (lcp2_failback_candidate(ctdb, + nodemap, + all_ips, + lips[i].pnn, + lips[i].imbalance, + lcp2_imbalances, + newly_healthy)) { + ret = true; + break; + } + } + + talloc_free(lips); + return ret; +} + /* The calculation part of the IP allocation algorithm. * Not static, so we can easily link it into a unit test. */ diff --git a/ctdb/tests/src/ctdb_takeover_tests.c b/ctdb/tests/src/ctdb_takeover_tests.c index a150018f11..6f83b5994a 100644 --- a/ctdb/tests/src/ctdb_takeover_tests.c +++ b/ctdb/tests/src/ctdb_takeover_tests.c @@ -20,6 +20,10 @@ #include "includes.h" #include "../include/ctdb_private.h" +/* This is lazy... but it is test code! */ +#define CTDB_TEST_MAX_NODES 256 +#define CTDB_TEST_MAX_IPS 256 + /* * Need these, since they're defined in ctdbd.c but we can't link * that. @@ -102,6 +106,157 @@ void ctdb_test_read_ctdb_public_ip_list(void) talloc_free(tmp_ctx); } +/* Format of each line is "IP pnn" - the separator has to be at least + * 1 space (not a tab or whatever - a space!). + */ +static bool +read_ctdb_public_ip_info(TALLOC_CTX *ctx, + int numnodes, + struct ctdb_public_ip_list ** all_ips, + struct ctdb_all_public_ips *** avail) +{ + char line[1024]; + ctdb_sock_addr addr; + char *t, *tok; + struct ctdb_public_ip_list * ta; + int pnn, numips, curr, n, i; + struct ctdb_all_public_ips * a; + + struct ctdb_public_ip_list *last = NULL; + + *avail = talloc_array_size(ctx, sizeof(struct ctdb_all_public_ips *), CTDB_TEST_MAX_NODES); + memset(*avail, 0, + sizeof(struct ctdb_all_public_ips *) * CTDB_TEST_MAX_NODES); + + numips = 0; + *all_ips = NULL; + while (fgets(line, sizeof(line), stdin) != NULL) { + + /* Get rid of pesky newline */ + if ((t = strchr(line, '\n')) != NULL) { + *t = '\0'; + } + + /* Get the IP address */ + tok = strtok(line, " \t"); + if (tok == NULL) { + DEBUG(DEBUG_ERR, (__location__ " WARNING, bad line ignored :%s\n", line)); + continue; + } + + if (!parse_ip(tok, NULL, 0, &addr)) { + DEBUG(DEBUG_ERR, (__location__ " ERROR, bad address :%s\n", tok)); + continue; + } + + numips++; + + /* Get the PNN */ + pnn = -1; + tok = strtok(NULL, " \t"); + if (tok != NULL) { + pnn = (int) strtol(tok, (char **) NULL, 10); + } + + /* Add address + pnn to all_ips */ + if (last == NULL) { + last = talloc(ctx, struct ctdb_public_ip_list); + } else { + last->next = talloc(ctx, struct ctdb_public_ip_list); + last = last->next; + } + last->next = NULL; + last->pnn = pnn; + memcpy(&(last->addr), &addr, sizeof(addr)); + if (*all_ips == NULL) { + *all_ips = last; + } + + tok = strtok(NULL, " \t#"); + if (tok == NULL) { + continue; + } + + /* Handle allowed nodes for addr */ + t = strtok(tok, ","); + while (t != NULL) { + n = (int) strtol(t, (char **) NULL, 10); + if ((*avail)[n] == NULL) { + (*avail)[n] = talloc_array(ctx, struct ctdb_all_public_ips, CTDB_TEST_MAX_IPS); + (*avail)[n]->num = 0; + } + curr = (*avail)[n]->num; + (*avail)[n]->ips[curr].pnn = pnn; + memcpy(&((*avail)[n]->ips[curr].addr), + &addr, sizeof(addr)); + (*avail)[n]->num++; + t = strtok(NULL, ","); + } + + } + + /* Build list of all allowed IPs */ + a = talloc_array(ctx, struct ctdb_all_public_ips, CTDB_TEST_MAX_IPS); + a->num = numips; + for (ta = *all_ips, i=0; ta != NULL && i < numips ; ta = ta->next, i++) { + a->ips[i].pnn = ta->pnn; + memcpy(&(a->ips[i].addr), &(ta->addr), sizeof(ta->addr)); + } + + /* Assign it to any nodes that don't have a list assigned */ + for (n = 0; n < numnodes; n++) { + if ((*avail)[n] == NULL) { + (*avail)[n] = a; + } + } + + return true; +} + +void print_ctdb_available_ips(int numnodes, struct ctdb_all_public_ips **avail) +{ + int n, i; + + for (n = 0; n < numnodes; n++) { + if ((avail[n] != NULL) && (avail[n]->num > 0)) { + printf("%d:", n); + for (i = 0; i < avail[n]->num; i++) { + printf("%s%s", + (i == 0) ? " " : ", ", + ctdb_addr_to_str(&(avail[n]->ips[i].addr))); + } + printf("\n"); + } + } +} + +void ctdb_test_read_ctdb_public_ip_info(const char nodestates[]) +{ + int numnodes; + struct ctdb_public_ip_list *l; + struct ctdb_all_public_ips **avail; + char *tok, *ns; + + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + + /* Avoid that const */ + ns = talloc_strdup(tmp_ctx, nodestates); + + numnodes = 0; + tok = strtok(ns, ","); + while (tok != NULL) { + numnodes++; + tok = strtok(NULL, ","); + } + + read_ctdb_public_ip_info(tmp_ctx, numnodes, &l, &avail); + + print_ctdb_public_ip_list(l); + print_ctdb_available_ips(numnodes, avail); + + talloc_free(tmp_ctx); +} + /* Read 2 IPs from stdin, calculate the IP distance and print it. */ void ctdb_test_ip_distance(void) { @@ -180,11 +335,9 @@ void ctdb_test_init(const char nodestates[], struct ctdb_public_ip_list **all_ips, struct ctdb_node_map **nodemap) { - struct ctdb_public_ip_list *t; - struct ctdb_all_public_ips *available_public_ips; - int i, numips, numnodes; - /* This is test code and this is unreasonably big... :-) */ - uint32_t nodeflags[256]; + struct ctdb_all_public_ips **avail; + int i, numnodes; + uint32_t nodeflags[CTDB_TEST_MAX_NODES]; char *tok, *ns; *ctdb = talloc_zero(NULL, struct ctdb_context); @@ -218,23 +371,10 @@ void ctdb_test_init(const char nodestates[], *nodemap = talloc_array(*ctdb, struct ctdb_node_map, numnodes); (*nodemap)->num = numnodes; - *all_ips = read_ctdb_public_ip_list(*ctdb); - numips = 0; - for (t = *all_ips; t != NULL; t = t->next) { - numips++; - } - - available_public_ips = talloc_array(*ctdb, struct ctdb_all_public_ips, numips); // FIXME: bogus size, overkill - available_public_ips->num = numips; - for (t = *all_ips, i=0; t != NULL && i < numips ; t = t->next, i++) { - available_public_ips->ips[i].pnn = t->pnn; - memcpy(&(available_public_ips->ips[i].addr), &(t->addr), sizeof(t->addr)); - } + read_ctdb_public_ip_info(*ctdb, numnodes, all_ips, &avail); (*ctdb)->nodes = talloc_array(*ctdb, struct ctdb_node *, numnodes); // FIXME: bogus size, overkill - /* Setup both nodemap and ctdb->nodes. Mark all nodes as - * healthy - change this later. */ for (i=0; i < numnodes; i++) { (*nodemap)->nodes[i].pnn = i; (*nodemap)->nodes[i].flags = nodeflags[i]; @@ -243,8 +383,8 @@ void ctdb_test_init(const char nodestates[], (*ctdb)->nodes[i] = talloc(*ctdb, struct ctdb_node); (*ctdb)->nodes[i]->pnn = i; (*ctdb)->nodes[i]->flags = nodeflags[i]; - (*ctdb)->nodes[i]->available_public_ips = available_public_ips; - (*ctdb)->nodes[i]->known_public_ips = available_public_ips; + (*ctdb)->nodes[i]->available_public_ips = avail[i]; + (*ctdb)->nodes[i]->known_public_ips = avail[i]; } } @@ -361,6 +501,8 @@ int main(int argc, const char *argv[]) if (strcmp(argv[1], "ip_list") == 0) { ctdb_test_read_ctdb_public_ip_list(); + } else if (argc == 3 && strcmp(argv[1], "ip_info") == 0) { + ctdb_test_read_ctdb_public_ip_info(argv[2]); } else if (strcmp(argv[1], "ip_distance") == 0) { ctdb_test_ip_distance(); } else if (argc == 4 && strcmp(argv[1], "ip_distance_2_sum") == 0) { diff --git a/ctdb/tests/takeover/common.sh b/ctdb/tests/takeover/common.sh index 8f7fee28ee..716e4d5fc7 100644 --- a/ctdb/tests/takeover/common.sh +++ b/ctdb/tests/takeover/common.sh @@ -15,7 +15,7 @@ define_test () case "$_f" in nondet.*) algorithm="nondet" - CTDB_LCP2="no" + export CTDB_LCP2="no" ;; lcp2.*) algorithm="lcp2" diff --git a/ctdb/tests/takeover/testcases/lcp2.005.sh b/ctdb/tests/takeover/testcases/lcp2.005.sh index e955eab6ed..3dbe532cb6 100755 --- a/ctdb/tests/takeover/testcases/lcp2.005.sh +++ b/ctdb/tests/takeover/testcases/lcp2.005.sh @@ -139,6 +139,24 @@ DATE TIME [PID]: 0 [-29786] -> 192.168.20.253 -> 2 [+45662] DATE TIME [PID]: 0 [-29786] -> 192.168.20.250 -> 0 [+29786] DATE TIME [PID]: 0 [-29786] -> 192.168.20.250 -> 2 [+45915] DATE TIME [PID]: ---------------------------------------- +DATE TIME [PID]: ---------------------------------------- +DATE TIME [PID]: CONSIDERING MOVES FROM 2 [43947] +DATE TIME [PID]: 2 [-28322] -> 192.168.21.254 -> 0 [+44198] +DATE TIME [PID]: 2 [-28322] -> 192.168.21.254 -> 2 [+28322] +DATE TIME [PID]: 2 [-29786] -> 192.168.20.254 -> 0 [+45662] +DATE TIME [PID]: 2 [-29786] -> 192.168.20.254 -> 2 [+29786] +DATE TIME [PID]: 2 [-29786] -> 192.168.20.251 -> 0 [+45915] +DATE TIME [PID]: 2 [-29786] -> 192.168.20.251 -> 2 [+29786] +DATE TIME [PID]: ---------------------------------------- +DATE TIME [PID]: ---------------------------------------- +DATE TIME [PID]: CONSIDERING MOVES FROM 1 [43744] +DATE TIME [PID]: 1 [-28322] -> 192.168.21.252 -> 0 [+44451] +DATE TIME [PID]: 1 [-28322] -> 192.168.21.252 -> 2 [+44198] +DATE TIME [PID]: 1 [-29786] -> 192.168.20.252 -> 0 [+45915] +DATE TIME [PID]: 1 [-29786] -> 192.168.20.252 -> 2 [+45662] +DATE TIME [PID]: 1 [-29786] -> 192.168.20.249 -> 0 [+45662] +DATE TIME [PID]: 1 [-29786] -> 192.168.20.249 -> 2 [+45662] +DATE TIME [PID]: ---------------------------------------- 192.168.21.254 2 192.168.21.253 0 192.168.21.252 1 diff --git a/ctdb/tests/takeover/testcases/lcp2.010.sh b/ctdb/tests/takeover/testcases/lcp2.010.sh new file mode 100755 index 0000000000..c47b6528cd --- /dev/null +++ b/ctdb/tests/takeover/testcases/lcp2.010.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +. "${TAKEOVER_TESTS_DIR}/common.sh" + +define_test "2 disjoint groups of nodes/addresses, a node becomes healthy" + +# This illustrates a bug in LCP2 when the the only candidate for a +# source node is chosen to be the "most imbalanced" node. This means +# that nodes in the smaller group aren't necessarily (depends on sort +# order and addresses used) considered as candidates. If the larger +# group has 6 addresses then the "necessarily" goes away and the +# smaller group won't be rebalanced. + +export CTDB_TEST_LOGLEVEL=0 + +required_result <<EOF +192.168.209.102 3 +192.168.209.101 2 +192.168.140.4 1 +192.168.140.3 1 +192.168.140.2 0 +192.168.140.1 0 +EOF + +simple_test 0,0,0,0 <<EOF +192.168.140.1 0 0,1 +192.168.140.2 0 0,1 +192.168.140.3 1 0,1 +192.168.140.4 1 0,1 +192.168.209.101 2 2,3 +192.168.209.102 2 2,3 +EOF |