diff options
-rw-r--r-- | ctdb/include/ctdb_private.h | 4 | ||||
-rw-r--r-- | ctdb/server/ctdb_takeover.c | 77 |
2 files changed, 74 insertions, 7 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index cb5b8ca208..94b45c0d85 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -213,6 +213,10 @@ struct ctdb_vnn { TALLOC_CTX *takeover_ctx; struct ctdb_kill_tcp *killtcp; + + /* Set to true any time an update to this VNN is in flight. + This helps to avoid races. */ + bool update_in_flight; }; /* diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 607dd8987e..b3e98d5f7a 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -373,6 +373,12 @@ static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status, return; } +static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state) +{ + state->vnn->update_in_flight = false; + return 0; +} + /* take over an ip address */ @@ -383,6 +389,14 @@ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, int ret; struct ctdb_do_takeip_state *state; + if (vnn->update_in_flight) { + DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected " + "update for this IP already in flight\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + ret = ctdb_vnn_assign_iface(ctdb, vnn); if (ret != 0) { DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to " @@ -398,6 +412,9 @@ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, state->c = talloc_steal(ctdb, c); state->vnn = vnn; + vnn->update_in_flight = true; + talloc_set_destructor(state, ctdb_takeip_destructor); + DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", ctdb_addr_to_str(&vnn->public_address), vnn->public_netmask_bits, @@ -480,6 +497,12 @@ static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status, return; } +static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state) +{ + state->vnn->update_in_flight = false; + return 0; +} + /* update (move) an ip address */ @@ -492,6 +515,14 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, struct ctdb_iface *old = vnn->iface; const char *new_name; + if (vnn->update_in_flight) { + DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected " + "update for this IP already in flight\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + ctdb_vnn_unassign_iface(ctdb, vnn); ret = ctdb_vnn_assign_iface(ctdb, vnn); if (ret != 0) { @@ -520,6 +551,9 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, state->old = old; state->vnn = vnn; + vnn->update_in_flight = true; + talloc_set_destructor(state, ctdb_updateip_destructor); + DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from " "interface %s to %s\n", ctdb_addr_to_str(&vnn->public_address), @@ -782,6 +816,12 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, talloc_free(state); } +static int ctdb_releaseip_destructor(struct takeover_callback_state *state) +{ + state->vnn->update_in_flight = false; + return 0; +} + /* release an ip address */ @@ -809,8 +849,12 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, talloc_free(vnn->takeover_ctx); vnn->takeover_ctx = NULL; + /* Some ctdb tool commands (e.g. moveip, rebalanceip) send + * lazy multicast to drop an IP from any node that isn't the + * intended new node. The following causes makes ctdbd ignore + * a release for any address it doesn't host. + */ if (ctdb->do_checkpublicip) { - if (!ctdb_sys_have_ip(&pip->addr)) { DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", ctdb_addr_to_str(&pip->addr), @@ -819,12 +863,6 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, ctdb_vnn_unassign_iface(ctdb, vnn); return 0; } - - iface = ctdb_sys_find_ifname(&pip->addr); - if (iface == NULL) { - DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n")); - return 0; - } } else { if (vnn->iface == NULL) { DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n", @@ -832,6 +870,28 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, vnn->public_netmask_bits)); return 0; } + } + + /* There is a potential race between take_ip and us because we + * update the VNN via a callback that run when the + * eventscripts have been run. Avoid the race by allowing one + * update to be in flight at a time. + */ + if (vnn->update_in_flight) { + DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected " + "update for this IP already in flight\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + + if (ctdb->do_checkpublicip) { + iface = ctdb_sys_find_ifname(&pip->addr); + if (iface == NULL) { + DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n")); + return 0; + } + } else { iface = strdup(ctdb_vnn_iface_string(vnn)); } @@ -850,6 +910,9 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, *state->addr = pip->addr; state->vnn = vnn; + vnn->update_in_flight = true; + talloc_set_destructor(state, ctdb_releaseip_destructor); + ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state, false, |