diff options
author | Andrew Tridgell <tridge@samba.org> | 2007-12-03 10:19:24 +1100 |
---|---|---|
committer | Andrew Tridgell <tridge@samba.org> | 2007-12-03 10:19:24 +1100 |
commit | 7edb41692eb8d32dfeadb08af61db744f39900c2 (patch) | |
tree | d8baa16c422f364bff2aa87f0c2032fbfe20b17f | |
parent | 330bf59ab1c14ca09bb4d60b9dbd02793debff1c (diff) | |
parent | 2f1baf34d39de59656bac37f3445bb6fc2b4b3d1 (diff) | |
download | samba-7edb41692eb8d32dfeadb08af61db744f39900c2.tar.gz samba-7edb41692eb8d32dfeadb08af61db744f39900c2.tar.xz samba-7edb41692eb8d32dfeadb08af61db744f39900c2.zip |
merge from ronnie
(This used to be ctdb commit 6653a0b67381310236e548e5fc0a9e27209b44e0)
-rw-r--r-- | ctdb/client/ctdb_client.c | 23 | ||||
-rw-r--r-- | ctdb/include/ctdb.h | 10 | ||||
-rw-r--r-- | ctdb/include/ctdb_private.h | 5 | ||||
-rw-r--r-- | ctdb/server/ctdb_control.c | 5 | ||||
-rw-r--r-- | ctdb/server/ctdb_monitor.c | 54 | ||||
-rw-r--r-- | ctdb/server/ctdb_recover.c | 7 | ||||
-rw-r--r-- | ctdb/server/ctdb_recoverd.c | 149 | ||||
-rw-r--r-- | ctdb/server/ctdb_server.c | 17 | ||||
-rw-r--r-- | ctdb/server/ctdb_takeover.c | 12 | ||||
-rw-r--r-- | ctdb/server/ctdbd.c | 18 | ||||
-rw-r--r-- | ctdb/tcp/tcp_connect.c | 26 | ||||
-rw-r--r-- | ctdb/tools/ctdb.c | 24 |
12 files changed, 237 insertions, 113 deletions
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 4d91e6d70c..1935f5b7b1 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -1932,29 +1932,6 @@ int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t } /* - set the monitoring mode of a remote node - */ -int ctdb_ctrl_setmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t monmode) -{ - int ret; - TDB_DATA data; - int32_t res; - - data.dsize = sizeof(uint32_t); - data.dptr = (uint8_t *)&monmode; - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_SET_MONMODE, 0, data, - NULL, NULL, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - DEBUG(0,(__location__ " ctdb_control for setmonmode failed\n")); - return -1; - } - - return 0; -} - -/* get the monitoring mode of a remote node */ int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode) diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index b3a65a36ab..e44706d848 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -160,6 +160,12 @@ int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname); int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist); /* + Check that a specific ip address exists in the node list and returns + the id for the node or -1 +*/ +int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip); + +/* start the ctdb protocol */ int ctdb_start(struct ctdb_context *ctdb); @@ -343,10 +349,6 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint get the monitoring mode of a remote node */ int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode); -/* - set the monitoringmode of a remote node - */ -int ctdb_ctrl_setmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t monmode); /* get the recovery master of a remote node diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index cb76bb0074..7f5ff2d1b2 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -367,6 +367,7 @@ struct ctdb_context { const char *default_public_interface; pid_t recoverd_pid; bool done_startup; + const char *node_ip; }; struct ctdb_db_context { @@ -451,7 +452,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_GET_PNN = 35, CTDB_CONTROL_SHUTDOWN = 36, CTDB_CONTROL_GET_MONMODE = 37, - CTDB_CONTROL_SET_MONMODE = 38, + /* #38 removed */ CTDB_CONTROL_MAX_RSN = 39, CTDB_CONTROL_SET_RSN_NONEMPTY = 40, CTDB_CONTROL_DELETE_LOW_RSN = 41, @@ -1044,6 +1045,8 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb); uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb); +void ctdb_disable_monitoring(struct ctdb_context *ctdb); +void ctdb_enable_monitoring(struct ctdb_context *ctdb); void ctdb_stop_monitoring(struct ctdb_context *ctdb); void ctdb_start_monitoring(struct ctdb_context *ctdb); void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb); diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index f13f39dbcc..35d2e155db 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -223,11 +223,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); return ctdb_control_set_recmode(ctdb, c, indata, async_reply, errormsg); - case CTDB_CONTROL_SET_MONMODE: - CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); - ctdb->monitoring_mode = *(uint32_t *)indata.dptr; - return 0; - case CTDB_CONTROL_GET_MONMODE: return ctdb->monitoring_mode; diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index 0e2dc29c6a..bdb3d45eda 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -33,13 +33,6 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); int i; - if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) { - event_add_timed(ctdb->ev, ctdb->monitor_context, - timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), - ctdb_check_for_dead_nodes, ctdb); - return; - } - /* send a keepalive to all other nodes, unless */ for (i=0;i<ctdb->num_nodes;i++) { struct ctdb_node *node = ctdb->nodes[i]; @@ -118,8 +111,8 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p) } event_add_timed(ctdb->ev, ctdb->monitor_context, - timeval_current_ofs(next_interval, 0), - ctdb_check_health, ctdb); + timeval_current_ofs(next_interval, 0), + ctdb_check_health, ctdb); if (c.old_flags == node->flags) { return; @@ -155,7 +148,7 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p ctdb_check_health, ctdb); } else { event_add_timed(ctdb->ev, ctdb->monitor_context, - timeval_current_ofs(ctdb->tunable.monitor_interval, 0), + timeval_current_ofs(ctdb->tunable.monitor_interval, 0), ctdb_check_health, ctdb); } @@ -199,12 +192,35 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te, } } -/* stop any monitoring */ +/* + (Temporaily) Disabling monitoring will stop the monitor event scripts + from running but node health checks will still occur +*/ +void ctdb_disable_monitoring(struct ctdb_context *ctdb) +{ + ctdb->monitoring_mode = CTDB_MONITORING_DISABLED; + DEBUG(2,("Monitoring has been disabled\n")); +} + +/* + Re-enable running monitor events after they have been disabled + */ +void ctdb_enable_monitoring(struct ctdb_context *ctdb) +{ + ctdb->monitoring_mode = CTDB_MONITORING_ACTIVE; + DEBUG(2,("Monitoring has been enabled\n")); +} + +/* stop any monitoring + this should only be done when shutting down the daemon +*/ void ctdb_stop_monitoring(struct ctdb_context *ctdb) { talloc_free(ctdb->monitor_context); - ctdb->monitor_context = talloc_new(ctdb); - CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor_context); + ctdb->monitor_context = NULL; + + ctdb->monitoring_mode = CTDB_MONITORING_DISABLED; + DEBUG(0,("Monitoring has been stopped\n")); } /* @@ -214,8 +230,15 @@ void ctdb_start_monitoring(struct ctdb_context *ctdb) { struct timed_event *te; + if (ctdb->monitoring_mode == CTDB_MONITORING_ACTIVE) { + return; + } + ctdb_stop_monitoring(ctdb); + ctdb->monitor_context = talloc_new(ctdb); + CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor_context); + te = event_add_timed(ctdb->ev, ctdb->monitor_context, timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), ctdb_check_for_dead_nodes, ctdb); @@ -225,6 +248,9 @@ void ctdb_start_monitoring(struct ctdb_context *ctdb) timeval_current_ofs(ctdb->tunable.monitor_retry, 0), ctdb_check_health, ctdb); CTDB_NO_MEMORY_FATAL(ctdb, te); + + ctdb->monitoring_mode = CTDB_MONITORING_ACTIVE; + DEBUG(0,("Monitoring has been started\n")); } @@ -243,7 +269,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata) node->flags &= ~m->clear; if (node->flags == old_flags) { - /* no change */ + DEBUG(2, ("Control modflags on node %u - Unchanged - flags 0x%x\n", ctdb->pnn, node->flags)); return 0; } diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 3721facdba..8b2dfb7583 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -415,7 +415,7 @@ static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void { struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state); - ctdb_start_monitoring(ctdb); + ctdb_enable_monitoring(state->ctdb); if (status == 0) { ctdb->recovery_mode = state->recmode; @@ -484,7 +484,7 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, } - ctdb_stop_monitoring(state->ctdb); + ctdb_disable_monitoring(state->ctdb); /* call the events script to tell all subsystems that we have recovered */ ret = ctdb_event_script_callback(state->ctdb, @@ -492,7 +492,10 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, state, ctdb_recovered_callback, state, "recovered"); + if (ret != 0) { + ctdb_enable_monitoring(state->ctdb); + ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "failed to run eventscript from set_recmode"); talloc_free(state); return; diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index e54c53d935..8e297e9f52 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -61,12 +61,21 @@ static void ctdb_unban_node(struct ctdb_recoverd *rec, uint32_t pnn) { struct ctdb_context *ctdb = rec->ctdb; + DEBUG(0,("Unbanning node %u\n", pnn)); + if (!ctdb_validate_pnn(ctdb, pnn)) { DEBUG(0,("Bad pnn %u in ctdb_unban_node\n", pnn)); return; } + if (pnn == ctdb->pnn) { + /* make sure we remember we are no longer banned in case + there is an election */ + rec->node_flags &= ~NODE_FLAGS_BANNED; + } + if (rec->banned_nodes[pnn] == NULL) { + DEBUG(0,("No ban recorded for this node. ctdb_unban_node() request ignored\n")); return; } @@ -97,6 +106,8 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_ { struct ctdb_context *ctdb = rec->ctdb; + DEBUG(0,("Banning node %u for %u seconds\n", pnn, ban_time)); + if (!ctdb_validate_pnn(ctdb, pnn)) { DEBUG(0,("Bad pnn %u in ctdb_ban_node\n", pnn)); return; @@ -111,10 +122,20 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_ DEBUG(0,("self ban - lowering our election priority\n")); /* banning ourselves - lower our election priority */ rec->priority_time = timeval_current(); + + /* make sure we remember we are banned in case there is an + election */ + rec->node_flags |= NODE_FLAGS_BANNED; } ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, NODE_FLAGS_BANNED, 0); + if (rec->banned_nodes[pnn] != NULL) { + DEBUG(0,("Re-banning an already banned node. Remove previous ban and set a new ban.\n")); + talloc_free(rec->banned_nodes[pnn]); + rec->banned_nodes[pnn] = NULL; + } + rec->banned_nodes[pnn] = talloc(rec, struct ban_state); CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes[pnn]); @@ -739,13 +760,32 @@ static void ctdb_wait_election(struct ctdb_recoverd *rec) } } +/* + remember the trouble maker + */ +static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit) +{ + struct ctdb_context *ctdb = rec->ctdb; + + if (rec->last_culprit != culprit || + timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) { + DEBUG(0,("New recovery culprit %u\n", culprit)); + /* either a new node is the culprit, or we've decided to forgive them */ + rec->last_culprit = culprit; + rec->first_recover_time = timeval_current(); + rec->culprit_counter = 0; + } + rec->culprit_counter++; +} /* - update our local flags from all remote connected nodes. + Update our local flags from all remote connected nodes. + This is only run when we are or we belive we are the recovery master */ -static int update_local_flags(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) +static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *nodemap) { int j; + struct ctdb_context *ctdb = rec->ctdb; TALLOC_CTX *mem_ctx = talloc_new(ctdb); /* get the nodemap for all active remote nodes and verify @@ -767,19 +807,55 @@ static int update_local_flags(struct ctdb_context *ctdb, struct ctdb_node_map *n if (ret != 0) { DEBUG(0, (__location__ " Unable to get nodemap from remote node %u\n", nodemap->nodes[j].pnn)); + ctdb_set_culprit(rec, nodemap->nodes[j].pnn); talloc_free(mem_ctx); - return -1; + return MONITOR_FAILED; } if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) { + struct ctdb_node_flag_change c; + TDB_DATA data; + + /* We should tell our daemon about this so it + updates its flags or else we will log the same + message again in the next iteration of recovery. + Since we are the recovery master we can just as + well update the flags on all nodes. + */ + c.pnn = nodemap->nodes[j].pnn; + c.old_flags = nodemap->nodes[j].flags; + c.new_flags = remote_nodemap->nodes[j].flags; + + data.dptr = (uint8_t *)&c; + data.dsize = sizeof(c); + + ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, + CTDB_SRVID_NODE_FLAGS_CHANGED, + data); + + /* Update our local copy of the flags in the recovery + daemon. + */ DEBUG(0,("Remote node %u had flags 0x%x, local had 0x%x - updating local\n", - nodemap->nodes[j].pnn, nodemap->nodes[j].flags, - remote_nodemap->nodes[j].flags)); + nodemap->nodes[j].pnn, remote_nodemap->nodes[j].flags, + nodemap->nodes[j].flags)); nodemap->nodes[j].flags = remote_nodemap->nodes[j].flags; + + /* If the BANNED flag has changed for the node + this is a good reason to do a new election. + */ + if ((c.old_flags ^ c.new_flags) & NODE_FLAGS_BANNED) { + DEBUG(0,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n", + nodemap->nodes[j].pnn, c.new_flags, + c.old_flags)); + talloc_free(mem_ctx); + return MONITOR_ELECTION_NEEDED; + } + } talloc_free(remote_nodemap); } talloc_free(mem_ctx); - return 0; + return MONITOR_OK; } @@ -801,23 +877,6 @@ static uint32_t new_generation(void) return generation; } -/* - remember the trouble maker - */ -static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit) -{ - struct ctdb_context *ctdb = rec->ctdb; - - if (rec->last_culprit != culprit || - timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) { - DEBUG(0,("New recovery culprit %u\n", culprit)); - /* either a new node is the culprit, or we've decide to forgive them */ - rec->last_culprit = culprit; - rec->first_recover_time = timeval_current(); - rec->culprit_counter = 0; - } - rec->culprit_counter++; -} /* we are the recmaster, and recovery is needed - start a recovery run @@ -1615,6 +1674,18 @@ again: goto again; } + + /* We must check if we need to ban a node here but we want to do this + as early as possible so we dont wait until we have pulled the node + map from the local node. thats why we have the hardcoded value 20 + */ + if (rec->culprit_counter > 20) { + DEBUG(0,("Node %u has caused %u failures in %.0f seconds - banning it for %u seconds\n", + rec->last_culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time), + ctdb->tunable.recovery_ban_period)); + ctdb_ban_node(rec, rec->last_culprit, ctdb->tunable.recovery_ban_period); + } + /* get relevant tunables */ ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable); if (ret != 0) { @@ -1643,6 +1714,29 @@ again: goto again; } + /* check that we (recovery daemon) and the local ctdb daemon + agrees on whether we are banned or not + */ + if (nodemap->nodes[pnn].flags & NODE_FLAGS_BANNED) { + if (rec->banned_nodes[pnn] == NULL) { + DEBUG(0,("Local ctdb daemon thinks this node is BANNED but the recovery master disagrees. Re-banning the node\n")); + + ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period); + ctdb_set_culprit(rec, pnn); + + goto again; + } + } else { + if (rec->banned_nodes[pnn] != NULL) { + DEBUG(0,("Local ctdb daemon does not think this node is BANNED but the recovery master disagrees. Re-banning the node\n")); + + ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period); + ctdb_set_culprit(rec, pnn); + + goto again; + } + } + /* remember our own node flags */ rec->node_flags = nodemap->nodes[pnn].flags; @@ -1764,8 +1858,13 @@ again: /* ensure our local copies of flags are right */ - ret = update_local_flags(ctdb, nodemap); - if (ret != 0) { + ret = update_local_flags(rec, nodemap); + if (ret == MONITOR_ELECTION_NEEDED) { + DEBUG(0,("update_local_flags() called for a re-election.\n")); + force_election(rec, mem_ctx, pnn, nodemap); + goto again; + } + if (ret != MONITOR_OK) { DEBUG(0,("Unable to update local flags\n")); goto again; } diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index dddf90753b..2a80798dd9 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -35,6 +35,23 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport) } /* + Check whether an ip is a valid node ip + Returns the node id for this ip address or -1 +*/ +int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip) +{ + int nodeid; + + for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) { + if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) { + return nodeid; + } + } + + return -1; +} + +/* choose the recovery lock file */ int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index ec3455e4c0..a452da6424 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -131,7 +131,7 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status, char *ip = inet_ntoa(state->sin->sin_addr); struct ctdb_tcp_array *tcparray; - ctdb_start_monitoring(ctdb); + ctdb_enable_monitoring(ctdb); if (status != 0) { DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n", @@ -238,7 +238,7 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits, vnn->iface)); - ctdb_stop_monitoring(ctdb); + ctdb_disable_monitoring(ctdb); ret = ctdb_event_script_callback(ctdb, timeval_current_ofs(ctdb->tunable.script_timeout, 0), @@ -247,7 +247,9 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, vnn->iface, inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits); + if (ret != 0) { + ctdb_enable_monitoring(ctdb); DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n", inet_ntoa(pip->sin.sin_addr), vnn->iface)); talloc_free(state); @@ -299,7 +301,7 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, char *ip = inet_ntoa(state->sin->sin_addr); TDB_DATA data; - ctdb_start_monitoring(ctdb); + ctdb_enable_monitoring(ctdb); /* send a message to all clients of this node telling them that the cluster has been reconfigured and they should @@ -364,7 +366,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, state->vnn = vnn; - ctdb_stop_monitoring(ctdb); + ctdb_disable_monitoring(ctdb); ret = ctdb_event_script_callback(ctdb, timeval_current_ofs(ctdb->tunable.script_timeout, 0), @@ -374,6 +376,8 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits); if (ret != 0) { + ctdb_enable_monitoring(ctdb); + DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n", inet_ntoa(pip->sin.sin_addr), vnn->iface)); talloc_free(state); diff --git a/ctdb/server/ctdbd.c b/ctdb/server/ctdbd.c index eaf79a0ed7..5613ea91b7 100644 --- a/ctdb/server/ctdbd.c +++ b/ctdb/server/ctdbd.c @@ -50,6 +50,7 @@ static struct { const char *db_dir_persistent; const char *public_interface; const char *single_public_ip; + const char *node_ip; int no_setsched; } options = { .nlist = ETCDIR "/ctdb/nodes", @@ -110,6 +111,7 @@ int main(int argc, const char *argv[]) { "event-script-dir", 0, POPT_ARG_STRING, &options.event_script_dir, 0, "event script directory", "dirname" }, { "logfile", 0, POPT_ARG_STRING, &options.logfile, 0, "log file location", "filename" }, { "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" }, + { "node-ip", 0, POPT_ARG_STRING, &options.node_ip, 0, "node ip", "ip-address"}, { "listen", 0, POPT_ARG_STRING, &options.myaddress, 0, "address to listen on", "address" }, { "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL }, { "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL }, @@ -166,7 +168,7 @@ int main(int argc, const char *argv[]) ctdb->upcalls = &ctdb_upcalls; ctdb->idr = idr_init(ctdb); ctdb->recovery_lock_fd = -1; - ctdb->monitoring_mode = CTDB_MONITORING_ACTIVE; + ctdb->monitoring_mode = CTDB_MONITORING_DISABLED; ctdb_tunables_set_defaults(ctdb); @@ -198,6 +200,20 @@ int main(int argc, const char *argv[]) exit(1); } + /* if a node-ip was specified, verify that it exists in the + nodes file + */ + if (options.node_ip != NULL) { + DEBUG(0,("IP for this node is %s\n", options.node_ip)); + ret = ctdb_ip_to_nodeid(ctdb, options.node_ip); + if (ret == -1) { + DEBUG(0,("The specified node-ip:%s is not a valid node address. Exiting.\n", options.node_ip)); + exit(1); + } + ctdb->node_ip = options.node_ip; + DEBUG(0,("This is node %d\n", ret)); + } + if (options.db_dir) { ret = ctdb_set_tdb_dir(ctdb, options.db_dir); if (ret == -1) { diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 3548f82ed7..3c4e7bfb10 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -214,13 +214,9 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, if (fd == -1) return; incoming_node = inet_ntoa(addr.sin_addr); - for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) { - if (!strcmp(incoming_node, ctdb->nodes[nodeid]->address.address)) { - DEBUG(0, ("Incoming connection from node:%d %s\n",nodeid,incoming_node)); - break; - } - } - if (nodeid>=ctdb->num_nodes) { + nodeid = ctdb_ip_to_nodeid(ctdb, incoming_node); + + if (nodeid == -1) { DEBUG(0, ("Refused connection from unknown node %s\n", incoming_node)); close(fd); return; @@ -275,17 +271,27 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) } for (i=0;i<ctdb->num_nodes;i++) { + /* if node_ip is specified we will only try to bind to that + ip. + */ + if (ctdb->node_ip != NULL) { + if (strcmp(ctdb->node_ip, ctdb->nodes[i]->address.address)) { + continue; + } + } + ZERO_STRUCT(sock); #ifdef HAVE_SOCK_SIN_LEN sock.sin_len = sizeof(sock); #endif sock.sin_port = htons(ctdb->nodes[i]->address.port); sock.sin_family = PF_INET; - if (ctdb_tcp_get_address(ctdb, ctdb->nodes[i]->address.address, - &sock.sin_addr) != 0) { + if (ctdb_tcp_get_address(ctdb, + ctdb->nodes[i]->address.address, + &sock.sin_addr) != 0) { continue; } - + if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) == 0) { break; diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index e0c6621429..0a4f370697 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -754,29 +754,6 @@ static int control_getmonmode(struct ctdb_context *ctdb, int argc, const char ** } /* - set the monitoring mode of a remote node - */ -static int control_setmonmode(struct ctdb_context *ctdb, int argc, const char **argv) -{ - uint32_t monmode; - int ret; - - if (argc < 1) { - usage(); - } - - monmode = strtoul(argv[0], NULL, 0); - - ret = ctdb_ctrl_setmonmode(ctdb, TIMELIMIT(), options.pnn, monmode); - if (ret != 0) { - DEBUG(0, ("Unable to set monmode on node %u\n", options.pnn)); - return ret; - } - - return 0; -} - -/* display remote list of keys/data for a db */ static int control_catdb(struct ctdb_context *ctdb, int argc, const char **argv) @@ -1082,7 +1059,6 @@ static const struct { { "getdbmap", control_getdbmap, true, "show the database map" }, { "catdb", control_catdb, true, "dump a database" , "<dbname>"}, { "getmonmode", control_getmonmode, true, "show monitoring mode" }, - { "setmonmode", control_setmonmode, true, "set monitoring mode", "<0|1>" }, { "setdebug", control_setdebug, true, "set debug level", "<debuglevel>" }, { "getdebug", control_getdebug, true, "get debug level" }, { "attach", control_attach, true, "attach to a database", "<dbname>" }, |