diff options
author | Stefan Metzmacher <metze@samba.org> | 2013-11-13 14:17:32 +0100 |
---|---|---|
committer | Michael Adam <obnox@samba.org> | 2013-11-13 14:18:52 +0100 |
commit | f087a8e2b81fae82fa571ef09d2d1cb682cc8ff8 (patch) | |
tree | d4a6b14a1594958150b733180d995b5c32025017 /ctdb/server/ctdb_server.c | |
parent | e80a5aba3db8e81173fa443991e08ef4a300ea5c (diff) | |
parent | 25f3c8b5269863aadbe72e304da7012782ef5b25 (diff) | |
download | samba-f087a8e2b81fae82fa571ef09d2d1cb682cc8ff8.tar.gz samba-f087a8e2b81fae82fa571ef09d2d1cb682cc8ff8.tar.xz samba-f087a8e2b81fae82fa571ef09d2d1cb682cc8ff8.zip |
Merge branch 'master' of ctdb into 'master' of samba
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Signed-off-by: Michael Adam <obnox@samba.org>
Diffstat (limited to 'ctdb/server/ctdb_server.c')
-rw-r--r-- | ctdb/server/ctdb_server.c | 690 |
1 files changed, 690 insertions, 0 deletions
diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c new file mode 100644 index 0000000000..c45f4cb5ff --- /dev/null +++ b/ctdb/server/ctdb_server.c @@ -0,0 +1,690 @@ +/* + ctdb main protocol code + + Copyright (C) Andrew Tridgell 2006 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "tdb.h" +#include "lib/util/dlinklist.h" +#include "system/network.h" +#include "system/filesys.h" +#include "../include/ctdb_private.h" + +/* + choose the transport we will use +*/ +int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport) +{ + ctdb->transport = talloc_strdup(ctdb, transport); + CTDB_NO_MEMORY(ctdb, ctdb->transport); + + return 0; +} + +/* + Check whether an ip is a valid node ip + Returns the node id for this ip address or -1 +*/ +int ctdb_ip_to_nodeid(struct ctdb_context *ctdb, const char *nodeip) +{ + int nodeid; + + for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) { + if (ctdb->nodes[nodeid]->flags & NODE_FLAGS_DELETED) { + continue; + } + if (!strcmp(ctdb->nodes[nodeid]->address.address, nodeip)) { + return nodeid; + } + } + + return -1; +} + +/* + choose the recovery lock file +*/ +int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file) +{ + if (ctdb->recovery_lock_file != NULL) { + talloc_free(ctdb->recovery_lock_file); + ctdb->recovery_lock_file = NULL; + } + + if (file == NULL) { + DEBUG(DEBUG_ALERT,("Recovery lock file set to \"\". Disabling recovery lock checking\n")); + ctdb->tunable.verify_recovery_lock = 0; + return 0; + } + + ctdb->recovery_lock_file = talloc_strdup(ctdb, file); + CTDB_NO_MEMORY(ctdb, ctdb->recovery_lock_file); + + return 0; +} + +/* + add a node to the list of nodes +*/ +static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr) +{ + struct ctdb_node *node, **nodep; + + nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1); + CTDB_NO_MEMORY(ctdb, nodep); + + ctdb->nodes = nodep; + nodep = &ctdb->nodes[ctdb->num_nodes]; + (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node); + CTDB_NO_MEMORY(ctdb, *nodep); + node = *nodep; + + if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) { + return -1; + } + node->ctdb = ctdb; + node->name = talloc_asprintf(node, "%s:%u", + node->address.address, + node->address.port); + /* this assumes that the nodes are kept in sorted order, and no gaps */ + node->pnn = ctdb->num_nodes; + + /* nodes start out disconnected and unhealthy */ + node->flags = (NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY); + + if (ctdb->address.address && + ctdb_same_address(&ctdb->address, &node->address)) { + /* for automatic binding to interfaces, see tcp_connect.c */ + ctdb->pnn = node->pnn; + } + + ctdb->num_nodes++; + node->dead_count = 0; + + return 0; +} + +/* + add an entry for a "deleted" node to the list of nodes. + a "deleted" node is a node that is commented out from the nodes file. + this is used to prevent that subsequent nodes in the nodes list + change their pnn value if a node is "delete" by commenting it out and then + using "ctdb reloadnodes" at runtime. +*/ +static int ctdb_add_deleted_node(struct ctdb_context *ctdb) +{ + struct ctdb_node *node, **nodep; + + nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1); + CTDB_NO_MEMORY(ctdb, nodep); + + ctdb->nodes = nodep; + nodep = &ctdb->nodes[ctdb->num_nodes]; + (*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node); + CTDB_NO_MEMORY(ctdb, *nodep); + node = *nodep; + + if (ctdb_parse_address(ctdb, node, "0.0.0.0", &node->address) != 0) { + DEBUG(DEBUG_ERR,("Failed to setup deleted node %d\n", ctdb->num_nodes)); + return -1; + } + node->ctdb = ctdb; + node->name = talloc_strdup(node, "0.0.0.0:0"); + + /* this assumes that the nodes are kept in sorted order, and no gaps */ + node->pnn = ctdb->num_nodes; + + /* this node is permanently deleted/disconnected */ + node->flags = NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED; + + ctdb->num_nodes++; + node->dead_count = 0; + + return 0; +} + + +/* + setup the node list from a file +*/ +static int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist) +{ + char **lines; + int nlines; + int i, j, num_present; + + talloc_free(ctdb->nodes); + ctdb->nodes = NULL; + ctdb->num_nodes = 0; + + lines = file_lines_load(nlist, &nlines, ctdb); + if (lines == NULL) { + ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist); + return -1; + } + while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) { + nlines--; + } + + num_present = 0; + for (i=0; i < nlines; i++) { + char *node; + + node = lines[i]; + /* strip leading spaces */ + while((*node == ' ') || (*node == '\t')) { + node++; + } + if (*node == '#') { + if (ctdb_add_deleted_node(ctdb) != 0) { + talloc_free(lines); + return -1; + } + continue; + } + if (strcmp(node, "") == 0) { + continue; + } + if (ctdb_add_node(ctdb, node) != 0) { + talloc_free(lines); + return -1; + } + num_present++; + } + + /* initialize the vnn mapping table now that we have the nodes list, + skipping any deleted nodes + */ + ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map); + CTDB_NO_MEMORY(ctdb, ctdb->vnn_map); + + ctdb->vnn_map->generation = INVALID_GENERATION; + ctdb->vnn_map->size = num_present; + ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size); + CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map); + + for(i=0, j=0; i < ctdb->vnn_map->size; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + ctdb->vnn_map->map[j] = i; + j++; + } + + talloc_free(lines); + return 0; +} + +void ctdb_load_nodes_file(struct ctdb_context *ctdb) +{ + int ret; + + ret = ctdb_set_nlist(ctdb, ctdb->nodes_file); + if (ret == -1) { + DEBUG(DEBUG_ALERT,("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb))); + exit(1); + } +} + +/* + setup the local node address +*/ +int ctdb_set_address(struct ctdb_context *ctdb, const char *address) +{ + if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) { + return -1; + } + + ctdb->name = talloc_asprintf(ctdb, "%s:%u", + ctdb->address.address, + ctdb->address.port); + return 0; +} + + +/* + return the number of active nodes +*/ +uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb) +{ + int i; + uint32_t count=0; + for (i=0; i < ctdb->num_nodes; i++) { + if (!(ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE)) { + count++; + } + } + return count; +} + + +/* + called when we need to process a packet. This can be a requeued packet + after a lockwait, or a real packet from another node +*/ +void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) +{ + TALLOC_CTX *tmp_ctx; + + /* place the packet as a child of the tmp_ctx. We then use + talloc_free() below to free it. If any of the calls want + to keep it, then they will steal it somewhere else, and the + talloc_free() will only free the tmp_ctx */ + tmp_ctx = talloc_new(ctdb); + talloc_steal(tmp_ctx, hdr); + + DEBUG(DEBUG_DEBUG,(__location__ " ctdb request %u of type %u length %u from " + "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length, + hdr->srcnode, hdr->destnode)); + + switch (hdr->operation) { + case CTDB_REQ_CALL: + case CTDB_REPLY_CALL: + case CTDB_REQ_DMASTER: + case CTDB_REPLY_DMASTER: + /* we dont allow these calls when banned */ + if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_BANNED) { + DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u" + " request %u" + " length %u from node %u to %u while node" + " is banned\n", + hdr->operation, hdr->reqid, + hdr->length, + hdr->srcnode, hdr->destnode)); + goto done; + } + + /* for ctdb_call inter-node operations verify that the + remote node that sent us the call is running in the + same generation instance as this node + */ + if (ctdb->vnn_map->generation != hdr->generation) { + DEBUG(DEBUG_DEBUG,(__location__ " ctdb operation %u" + " request %u" + " length %u from node %u to %u had an" + " invalid generation id:%u while our" + " generation id is:%u\n", + hdr->operation, hdr->reqid, + hdr->length, + hdr->srcnode, hdr->destnode, + hdr->generation, ctdb->vnn_map->generation)); + goto done; + } + } + + switch (hdr->operation) { + case CTDB_REQ_CALL: + CTDB_INCREMENT_STAT(ctdb, node.req_call); + ctdb_request_call(ctdb, hdr); + break; + + case CTDB_REPLY_CALL: + CTDB_INCREMENT_STAT(ctdb, node.reply_call); + ctdb_reply_call(ctdb, hdr); + break; + + case CTDB_REPLY_ERROR: + CTDB_INCREMENT_STAT(ctdb, node.reply_error); + ctdb_reply_error(ctdb, hdr); + break; + + case CTDB_REQ_DMASTER: + CTDB_INCREMENT_STAT(ctdb, node.req_dmaster); + ctdb_request_dmaster(ctdb, hdr); + break; + + case CTDB_REPLY_DMASTER: + CTDB_INCREMENT_STAT(ctdb, node.reply_dmaster); + ctdb_reply_dmaster(ctdb, hdr); + break; + + case CTDB_REQ_MESSAGE: + CTDB_INCREMENT_STAT(ctdb, node.req_message); + ctdb_request_message(ctdb, hdr); + break; + + case CTDB_REQ_CONTROL: + CTDB_INCREMENT_STAT(ctdb, node.req_control); + ctdb_request_control(ctdb, hdr); + break; + + case CTDB_REPLY_CONTROL: + CTDB_INCREMENT_STAT(ctdb, node.reply_control); + ctdb_reply_control(ctdb, hdr); + break; + + case CTDB_REQ_KEEPALIVE: + CTDB_INCREMENT_STAT(ctdb, keepalive_packets_recv); + break; + + default: + DEBUG(DEBUG_CRIT,("%s: Packet with unknown operation %u\n", + __location__, hdr->operation)); + break; + } + +done: + talloc_free(tmp_ctx); +} + + +/* + called by the transport layer when a node is dead +*/ +void ctdb_node_dead(struct ctdb_node *node) +{ + if (node->flags & NODE_FLAGS_DISCONNECTED) { + DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", + node->ctdb->name, node->name, + node->ctdb->num_connected)); + return; + } + node->ctdb->num_connected--; + node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY; + node->rx_cnt = 0; + node->dead_count = 0; + + DEBUG(DEBUG_NOTICE,("%s: node %s is dead: %u connected\n", + node->ctdb->name, node->name, node->ctdb->num_connected)); + ctdb_daemon_cancel_controls(node->ctdb, node); + + if (node->ctdb->methods == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); + return; + } + + node->ctdb->methods->restart(node); +} + +/* + called by the transport layer when a node is connected +*/ +void ctdb_node_connected(struct ctdb_node *node) +{ + if (!(node->flags & NODE_FLAGS_DISCONNECTED)) { + DEBUG(DEBUG_INFO,("%s: node %s is already marked connected: %u connected\n", + node->ctdb->name, node->name, + node->ctdb->num_connected)); + return; + } + node->ctdb->num_connected++; + node->dead_count = 0; + node->flags &= ~NODE_FLAGS_DISCONNECTED; + node->flags |= NODE_FLAGS_UNHEALTHY; + DEBUG(DEBUG_NOTICE, + ("%s: connected to %s - %u connected\n", + node->ctdb->name, node->name, node->ctdb->num_connected)); +} + +struct queue_next { + struct ctdb_context *ctdb; + struct ctdb_req_header *hdr; +}; + + +/* + triggered when a deferred packet is due + */ +static void queue_next_trigger(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct queue_next *q = talloc_get_type(private_data, struct queue_next); + ctdb_input_pkt(q->ctdb, q->hdr); + talloc_free(q); +} + +/* + defer a packet, so it is processed on the next event loop + this is used for sending packets to ourselves + */ +static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) +{ + struct queue_next *q; + q = talloc(ctdb, struct queue_next); + if (q == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to allocate deferred packet\n")); + return; + } + q->ctdb = ctdb; + q->hdr = talloc_memdup(ctdb, hdr, hdr->length); + if (q->hdr == NULL) { + DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n")); + return; + } +#if 0 + /* use this to put packets directly into our recv function */ + ctdb_input_pkt(q->ctdb, q->hdr); +#else + event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q); +#endif +} + + +/* + broadcast a packet to all nodes +*/ +static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb, + struct ctdb_req_header *hdr) +{ + int i; + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + hdr->destnode = ctdb->nodes[i]->pnn; + ctdb_queue_packet(ctdb, hdr); + } +} + +/* + broadcast a packet to all nodes in the current vnnmap +*/ +static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb, + struct ctdb_req_header *hdr) +{ + int i; + for (i=0;i<ctdb->vnn_map->size;i++) { + hdr->destnode = ctdb->vnn_map->map[i]; + ctdb_queue_packet(ctdb, hdr); + } +} + +/* + broadcast a packet to all connected nodes +*/ +static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb, + struct ctdb_req_header *hdr) +{ + int i; + for (i=0; i < ctdb->num_nodes; i++) { + if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) { + continue; + } + if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) { + hdr->destnode = ctdb->nodes[i]->pnn; + ctdb_queue_packet(ctdb, hdr); + } + } +} + +/* + queue a packet or die +*/ +void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) +{ + struct ctdb_node *node; + + switch (hdr->destnode) { + case CTDB_BROADCAST_ALL: + ctdb_broadcast_packet_all(ctdb, hdr); + return; + case CTDB_BROADCAST_VNNMAP: + ctdb_broadcast_packet_vnnmap(ctdb, hdr); + return; + case CTDB_BROADCAST_CONNECTED: + ctdb_broadcast_packet_connected(ctdb, hdr); + return; + } + + CTDB_INCREMENT_STAT(ctdb, node_packets_sent); + + if (!ctdb_validate_pnn(ctdb, hdr->destnode)) { + DEBUG(DEBUG_CRIT,(__location__ " cant send to node %u that does not exist\n", + hdr->destnode)); + return; + } + + node = ctdb->nodes[hdr->destnode]; + + if (node->flags & NODE_FLAGS_DELETED) { + DEBUG(DEBUG_ERR, (__location__ " Can not queue packet to DELETED node %d\n", hdr->destnode)); + return; + } + + if (node->pnn == ctdb->pnn) { + ctdb_defer_packet(ctdb, hdr); + return; + } + + if (ctdb->methods == NULL) { + DEBUG(DEBUG_ALERT, (__location__ " Can not queue packet. " + "Transport is DOWN\n")); + return; + } + + node->tx_cnt++; + if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) { + ctdb_fatal(ctdb, "Unable to queue packet\n"); + } +} + + + + +/* + a valgrind hack to allow us to get opcode specific backtraces + very ugly, and relies on no compiler optimisation! +*/ +void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode) +{ + switch (opcode) { +#define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break + DO_OP(1); + DO_OP(2); + DO_OP(3); + DO_OP(4); + DO_OP(5); + DO_OP(6); + DO_OP(7); + DO_OP(8); + DO_OP(9); + DO_OP(10); + DO_OP(11); + DO_OP(12); + DO_OP(13); + DO_OP(14); + DO_OP(15); + DO_OP(16); + DO_OP(17); + DO_OP(18); + DO_OP(19); + DO_OP(20); + DO_OP(21); + DO_OP(22); + DO_OP(23); + DO_OP(24); + DO_OP(25); + DO_OP(26); + DO_OP(27); + DO_OP(28); + DO_OP(29); + DO_OP(30); + DO_OP(31); + DO_OP(32); + DO_OP(33); + DO_OP(34); + DO_OP(35); + DO_OP(36); + DO_OP(37); + DO_OP(38); + DO_OP(39); + DO_OP(40); + DO_OP(41); + DO_OP(42); + DO_OP(43); + DO_OP(44); + DO_OP(45); + DO_OP(46); + DO_OP(47); + DO_OP(48); + DO_OP(49); + DO_OP(50); + DO_OP(51); + DO_OP(52); + DO_OP(53); + DO_OP(54); + DO_OP(55); + DO_OP(56); + DO_OP(57); + DO_OP(58); + DO_OP(59); + DO_OP(60); + DO_OP(61); + DO_OP(62); + DO_OP(63); + DO_OP(64); + DO_OP(65); + DO_OP(66); + DO_OP(67); + DO_OP(68); + DO_OP(69); + DO_OP(70); + DO_OP(71); + DO_OP(72); + DO_OP(73); + DO_OP(74); + DO_OP(75); + DO_OP(76); + DO_OP(77); + DO_OP(78); + DO_OP(79); + DO_OP(80); + DO_OP(81); + DO_OP(82); + DO_OP(83); + DO_OP(84); + DO_OP(85); + DO_OP(86); + DO_OP(87); + DO_OP(88); + DO_OP(89); + DO_OP(90); + DO_OP(91); + DO_OP(92); + DO_OP(93); + DO_OP(94); + DO_OP(95); + DO_OP(96); + DO_OP(97); + DO_OP(98); + DO_OP(99); + DO_OP(100); + default: + ctdb_queue_packet(ctdb, hdr); + break; + } +} |