diff options
| author | Andrew Tridgell <tridge@samba.org> | 2007-05-18 23:23:36 +1000 |
|---|---|---|
| committer | Andrew Tridgell <tridge@samba.org> | 2007-05-18 23:23:36 +1000 |
| commit | 346dfc1bef22c6ee41d1ec7cdee19c1a0fbd11d6 (patch) | |
| tree | 06e1605e3d8cf2b9e15b4e090cb9de98e228a8e5 | |
| parent | 9ea6e3ca8b05a2e291654d9fe01c3fa4dbec5c30 (diff) | |
- up rx_cnt on all packet types
- notice when a node becomes available again
(This used to be ctdb commit e05110dd6112e81f224937dfd7370d963ce9531a)
| -rw-r--r-- | ctdb/common/ctdb.c | 9 | ||||
| -rw-r--r-- | ctdb/common/ctdb_call.c | 6 | ||||
| -rw-r--r-- | ctdb/common/ctdb_monitor.c | 64 | ||||
| -rw-r--r-- | ctdb/include/ctdb_private.h | 6 |
4 files changed, 35 insertions, 50 deletions
diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c index b5829e55d7..5471463105 100644 --- a/ctdb/common/ctdb.c +++ b/ctdb/common/ctdb.c @@ -116,8 +116,7 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr) node->name = talloc_asprintf(node, "%s:%u", node->address.address, node->address.port); - /* for now we just set the vnn to the line in the file - this - will change! */ + /* this assumes that the nodes are kept in sorted order, and no gaps */ node->vnn = ctdb->num_nodes; if (ctdb->address.address && @@ -275,6 +274,11 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length) "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length, hdr->srcnode, hdr->destnode)); + /* up the counter for this source node, so we know its alive */ + if (ctdb_validate_vnn(ctdb, hdr->srcnode)) { + ctdb->nodes[hdr->srcnode]->rx_cnt++; + } + switch (hdr->operation) { case CTDB_REQ_CALL: case CTDB_REPLY_CALL: @@ -345,7 +349,6 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length) case CTDB_REQ_KEEPALIVE: ctdb->status.keepalive_packets_recv++; - ctdb_request_keepalive(ctdb, hdr); break; default: diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c index fadbfac947..cd7244ff15 100644 --- a/ctdb/common/ctdb_call.c +++ b/ctdb/common/ctdb_call.c @@ -785,13 +785,11 @@ int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call) /* send a keepalive packet to the other node */ -void ctdb_send_keepalive(struct ctdb_context *ctdb, - TALLOC_CTX *mem_ctx, - uint32_t destnode) +void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode) { struct ctdb_req_keepalive *r; - r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE, + r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE, sizeof(struct ctdb_req_keepalive), struct ctdb_req_keepalive); CTDB_NO_MEMORY_FATAL(ctdb, r); diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c index 3f8b68128e..ff2046ed8a 100644 --- a/ctdb/common/ctdb_monitor.c +++ b/ctdb/common/ctdb_monitor.c @@ -26,73 +26,55 @@ #include "../include/ctdb_private.h" /* - called when a CTDB_REQ_KEEPALIVE packet comes in -*/ -void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) -{ - struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr; - struct ctdb_node *node = NULL; - int i; - - for (i=0;i<ctdb->num_nodes;i++) { - if (ctdb->nodes[i]->vnn == r->hdr.srcnode) { - node = ctdb->nodes[i]; - break; - } - } - if (!node) { - DEBUG(0,(__location__ " Keepalive received from node not in ctdb->nodes : %u\n", r->hdr.srcnode)); - return; - } - - node->rx_cnt++; -} - - + see if any nodes are dead + */ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) { struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); int i; - TALLOC_CTX *mem_ctx = talloc_new(ctdb); /* send a keepalive to all other nodes, unless */ for (i=0;i<ctdb->num_nodes;i++) { - if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) { + struct ctdb_node *node = ctdb->nodes[i]; + if (node->vnn == ctdb->vnn) { continue; } - if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) { - continue; + + /* it might have come alive again */ + if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) { + DEBUG(0,("Node %u is alive again - marking as connected\n", node->vnn)); + node->flags |= NODE_FLAGS_CONNECTED; } - if (ctdb->nodes[i]->rx_cnt == 0) { - ctdb->nodes[i]->dead_count++; + if (node->rx_cnt == 0) { + node->dead_count++; } else { - ctdb->nodes[i]->dead_count = 0; + node->dead_count = 0; } - if (ctdb->nodes[i]->dead_count>=3) { - ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED; - /* should probably tell the transport layer - to kill the sockets as well + node->rx_cnt = 0; + + if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) { + DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn)); + node->flags &= ~NODE_FLAGS_CONNECTED; + /* maybe tell the transport layer to kill the + sockets as well? */ continue; } - ctdb_send_keepalive(ctdb, mem_ctx, i); - ctdb->nodes[i]->rx_cnt = 0; + ctdb_send_keepalive(ctdb, node->vnn); } - - - - talloc_free(mem_ctx); - event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0), ctdb_check_for_dead_nodes, ctdb); } +/* + start watching for nodes that might be dead + */ int ctdb_start_monitoring(struct ctdb_context *ctdb) { event_add_timed(ctdb->ev, ctdb, diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 47d0fbb991..821a99efd4 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -311,6 +311,9 @@ struct ctdb_db_context { /* timeout between dead-node monitoring events */ #define CTDB_MONITORING_TIMEOUT 5 +/* number of monitoring timeouts before a node is considered dead */ +#define CTDB_MONITORING_DEAD_COUNT 3 + /* number of consecutive calls from the same node before we give them the record */ @@ -710,7 +713,6 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid); void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); -void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode, @@ -819,6 +821,6 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb); uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb); int ctdb_start_monitoring(struct ctdb_context *ctdb); -void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t destnode); +void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode); #endif |
