diff options
| author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2009-06-01 14:18:34 +1000 |
|---|---|---|
| committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2009-06-01 14:18:34 +1000 |
| commit | e6170b53894ce188a574c9359e2b8b2797e4e2d1 (patch) | |
| tree | 28c61aa02c6d1e5fdbddfdb4774bcd54c3ab5d11 /ctdb | |
| parent | 42591560506c7fee977975e261e4790ae1a0f97f (diff) | |
| download | samba-e6170b53894ce188a574c9359e2b8b2797e4e2d1.tar.gz samba-e6170b53894ce188a574c9359e2b8b2797e4e2d1.tar.xz samba-e6170b53894ce188a574c9359e2b8b2797e4e2d1.zip | |
add a new node state : DELETED.
This is used to mark nodes as being DELETED internally in ctdb
so that nodes are not renumbered if / when they are removed from the nodes file.
This is used to be able to do "ctdb reloadnodes" at runtime without
causing nodes to be renumbered.
To do this, instead of deleting a node from the nodes file, just comment it out like
1.0.0.1
#1.0.0.2
1.0.0.3
After removing 1.0.0.2 from the cluster, the remaining nodes retain their
pnn's from prior to the deletion, namely 0 and 2
Any line in the nodes file that is commented out represents a DELETED pnn
(This used to be ctdb commit 6a5e4fd7fa391206b463bb4e976502f3ac5bd343)
Diffstat (limited to 'ctdb')
| -rw-r--r-- | ctdb/include/ctdb.h | 5 | ||||
| -rw-r--r-- | ctdb/include/ctdb_private.h | 3 | ||||
| -rw-r--r-- | ctdb/server/ctdb_keepalive.c | 5 | ||||
| -rw-r--r-- | ctdb/server/ctdb_recover.c | 7 | ||||
| -rw-r--r-- | ctdb/server/ctdb_recoverd.c | 32 | ||||
| -rw-r--r-- | ctdb/server/ctdb_server.c | 65 | ||||
| -rw-r--r-- | ctdb/server/ctdb_takeover.c | 4 | ||||
| -rw-r--r-- | ctdb/tcp/tcp_connect.c | 6 | ||||
| -rw-r--r-- | ctdb/tcp/tcp_init.c | 10 | ||||
| -rw-r--r-- | ctdb/tools/ctdb.c | 1 |
10 files changed, 121 insertions, 17 deletions
diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index 866ba76e2a..ea4bcae109 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -101,6 +101,11 @@ struct ctdb_call_info { */ #define CTDB_SRVID_PUSH_NODE_FLAGS 0xF900000000000000LL +/* + a message ID to get the recovery daemon to reload the nodes file + */ +#define CTDB_SRVID_RELOAD_NODES 0xFA00000000000000LL + /* used on the domain socket, send a pdu to the local daemon */ diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index ff007a8ac6..98dab0785d 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -198,7 +198,8 @@ struct ctdb_node { #define NODE_FLAGS_PERMANENTLY_DISABLED 0x00000004 /* administrator has disabled node */ #define NODE_FLAGS_BANNED 0x00000008 /* recovery daemon has banned the node */ #define NODE_FLAGS_DISABLED (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED) -#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED) +#define NODE_FLAGS_DELETED 0x00000010 /* this node has been deleted */ +#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED) uint32_t flags; /* used by the dead node monitoring */ diff --git a/ctdb/server/ctdb_keepalive.c b/ctdb/server/ctdb_keepalive.c index 524feb1696..dfe7cfc622 100644 --- a/ctdb/server/ctdb_keepalive.c +++ b/ctdb/server/ctdb_keepalive.c @@ -37,6 +37,11 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve /* send a keepalive to all other nodes, unless */ for (i=0;i<ctdb->num_nodes;i++) { struct ctdb_node *node = ctdb->nodes[i]; + + if (node->flags & NODE_FLAGS_DELETED) { + continue; + } + if (node->pnn == ctdb->pnn) { continue; } diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index b4428fa494..7953c6b447 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -242,6 +242,10 @@ ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te, continue; } + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + /* any new or different nodes must be added */ if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) { DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i)); @@ -253,6 +257,9 @@ ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te, } } + /* tell the recovery daemon to reaload the nodes file too */ + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELOAD_NODES, tdb_null); + talloc_free(tmp_ctx); return; } diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 995284fbb2..6b2fb5e555 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1332,12 +1332,6 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n")); - if (ctdb->num_nodes != nodemap->num) { - DEBUG(DEBUG_ERR, (__location__ " ctdb->num_nodes (%d) != nodemap->num (%d) reloading nodes file\n", ctdb->num_nodes, nodemap->num)); - reload_nodes_file(ctdb); - return -1; - } - /* if recovery fails, force it again */ rec->need_recovery = true; @@ -1804,6 +1798,21 @@ DEBUG(DEBUG_ERR, ("recovery master memory dump\n")); } /* + handler for reload_nodes +*/ +static void reload_nodes_handler(struct ctdb_context *ctdb, uint64_t srvid, + TDB_DATA data, void *private_data) +{ + struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd); + + DEBUG(DEBUG_ERR, (__location__ " Reload nodes file from recovery daemon\n")); + + reload_nodes_file(rec->ctdb); +} + + + +/* handler for recovery master elections */ static void election_handler(struct ctdb_context *ctdb, uint64_t srvid, @@ -2371,6 +2380,9 @@ static void monitor_cluster(struct ctdb_context *ctdb) /* register a message port for vacuum fetch */ ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec); + /* register a message port for reloadnodes */ + ctdb_set_message_handler(ctdb, CTDB_SRVID_RELOAD_NODES, reload_nodes_handler, rec); + again: if (mem_ctx) { talloc_free(mem_ctx); @@ -2591,14 +2603,16 @@ again: goto again; } for (j=0; j<nodemap->num; j++) { - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } /* release any existing data */ if (ctdb->nodes[j]->public_ips) { talloc_free(ctdb->nodes[j]->public_ips); ctdb->nodes[j]->public_ips = NULL; } + + if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { + continue; + } + /* grab a new shiny list of public ips from the node */ if (ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(), ctdb->nodes[j]->pnn, diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index bd1d7ed621..1eaeae2367 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -45,6 +45,9 @@ int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip) int nodeid; for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) { + if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) { + continue; + } if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) { return nodeid; } @@ -89,7 +92,7 @@ int ctdb_set_tdb_dir_persistent(struct ctdb_context *ctdb, const char *dir) } /* - add a node to the list of active nodes + add a node to the list of nodes */ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr) { @@ -137,6 +140,46 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr) } /* + add an entry for a "deleted" node to the list of nodes. + a "deleted" node is a node that is commented out from the nodes file. + this is used to prevent that subsequent nodes in the nodes list + change their pnn value if a node is "delete" by commenting it out and then + using "ctdb reloadnodes" at runtime. +*/ +static int ctdb_add_deleted_node(struct ctdb_context *ctdb) +{ + struct ctdb_node *node, **nodep; + + nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1); + CTDB_NO_MEMORY(ctdb, nodep); + + ctdb->nodes = nodep; + nodep = &ctdb->nodes[ctdb->num_nodes]; + (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node); + CTDB_NO_MEMORY(ctdb, *nodep); + node = *nodep; + + if (ctdb_parse_address(ctdb, node, "0.0.0.0", &node->address) != 0) { + DEBUG(DEBUG_ERR,("Failed to setup deleted node %d\n", ctdb->num_nodes)); + return -1; + } + node->ctdb = ctdb; + node->name = talloc_strdup(node, "0.0.0.0:0"); + + /* this assumes that the nodes are kept in sorted order, and no gaps */ + node->pnn = ctdb->num_nodes; + + /* this node is permanently deleted/disconnected */ + node->flags = NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED; + + ctdb->num_nodes++; + node->dead_count = 0; + + return 0; +} + + +/* setup the node list from a file */ int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist) @@ -167,6 +210,10 @@ int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist) node++; } if (*node == '#') { + if (ctdb_add_deleted_node(ctdb) != 0) { + talloc_free(lines); + return -1; + } continue; } if (strcmp(node, "") == 0) { @@ -188,7 +235,11 @@ int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist) CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map); for(i=0;i<ctdb->vnn_map->size;i++) { - ctdb->vnn_map->map[i] = i; + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + ctdb->vnn_map->map[i] = -1; + } else { + ctdb->vnn_map->map[i] = i; + } } talloc_free(lines); @@ -437,7 +488,10 @@ static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) { int i; - for (i=0;i<ctdb->num_nodes;i++) { + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } hdr->destnode = ctdb->nodes[i]->pnn; ctdb_queue_packet(ctdb, hdr); } @@ -463,7 +517,10 @@ static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) { int i; - for (i=0;i<ctdb->num_nodes;i++) { + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) { hdr->destnode = ctdb->nodes[i]->pnn; ctdb_queue_packet(ctdb, hdr); diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 9eac660e8d..21f7dc8489 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -673,6 +673,10 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx) for (i=0;i<ctdb->num_nodes;i++) { public_ips = ctdb->nodes[i]->public_ips; + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + /* there were no public ips for this node */ if (public_ips == NULL) { continue; diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 9d28d48a1f..fc169e70b7 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -296,7 +296,11 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) return -1; } - for (i=0;i<ctdb->num_nodes;i++) { + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + /* if node_ip is specified we will only try to bind to that ip. */ diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index c0606f0ec5..58ed6c8a28 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -69,7 +69,10 @@ static int ctdb_tcp_initialise(struct ctdb_context *ctdb) exit(1); } - for (i=0; i<ctdb->num_nodes; i++) { + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } if (ctdb_tcp_add_node(ctdb->nodes[i]) != 0) { DEBUG(DEBUG_CRIT, ("methods->add_node failed at %d\n", i)); return -1; @@ -135,7 +138,10 @@ static int ctdb_tcp_start(struct ctdb_context *ctdb) { int i; - for (i=0; i<ctdb->num_nodes; i++) { + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } ctdb_tcp_connect_node(ctdb->nodes[i]); } diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 57e1a8dffe..fa4c5f9c27 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -492,6 +492,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv { NODE_FLAGS_PERMANENTLY_DISABLED, "DISABLED" }, { NODE_FLAGS_BANNED, "BANNED" }, { NODE_FLAGS_UNHEALTHY, "UNHEALTHY" }, + { NODE_FLAGS_DELETED, "DELETED" }, }; char *flags_str = NULL; int j; |
