summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2007-05-18 23:23:36 +1000
committerAndrew Tridgell <tridge@samba.org>2007-05-18 23:23:36 +1000
commit346dfc1bef22c6ee41d1ec7cdee19c1a0fbd11d6 (patch)
tree06e1605e3d8cf2b9e15b4e090cb9de98e228a8e5
parent9ea6e3ca8b05a2e291654d9fe01c3fa4dbec5c30 (diff)
- up rx_cnt on all packet types
- notice when a node becomes available again (This used to be ctdb commit e05110dd6112e81f224937dfd7370d963ce9531a)
-rw-r--r--ctdb/common/ctdb.c9
-rw-r--r--ctdb/common/ctdb_call.c6
-rw-r--r--ctdb/common/ctdb_monitor.c64
-rw-r--r--ctdb/include/ctdb_private.h6
4 files changed, 35 insertions, 50 deletions
diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c
index b5829e55d7..5471463105 100644
--- a/ctdb/common/ctdb.c
+++ b/ctdb/common/ctdb.c
@@ -116,8 +116,7 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
node->name = talloc_asprintf(node, "%s:%u",
node->address.address,
node->address.port);
- /* for now we just set the vnn to the line in the file - this
- will change! */
+ /* this assumes that the nodes are kept in sorted order, and no gaps */
node->vnn = ctdb->num_nodes;
if (ctdb->address.address &&
@@ -275,6 +274,11 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
"node %d to %d\n", hdr->reqid, hdr->operation, hdr->length,
hdr->srcnode, hdr->destnode));
+ /* up the counter for this source node, so we know its alive */
+ if (ctdb_validate_vnn(ctdb, hdr->srcnode)) {
+ ctdb->nodes[hdr->srcnode]->rx_cnt++;
+ }
+
switch (hdr->operation) {
case CTDB_REQ_CALL:
case CTDB_REPLY_CALL:
@@ -345,7 +349,6 @@ void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
case CTDB_REQ_KEEPALIVE:
ctdb->status.keepalive_packets_recv++;
- ctdb_request_keepalive(ctdb, hdr);
break;
default:
diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c
index fadbfac947..cd7244ff15 100644
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -785,13 +785,11 @@ int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call)
/*
send a keepalive packet to the other node
*/
-void ctdb_send_keepalive(struct ctdb_context *ctdb,
- TALLOC_CTX *mem_ctx,
- uint32_t destnode)
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
{
struct ctdb_req_keepalive *r;
- r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE,
+ r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
sizeof(struct ctdb_req_keepalive),
struct ctdb_req_keepalive);
CTDB_NO_MEMORY_FATAL(ctdb, r);
diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c
index 3f8b68128e..ff2046ed8a 100644
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@@ -26,73 +26,55 @@
#include "../include/ctdb_private.h"
/*
- called when a CTDB_REQ_KEEPALIVE packet comes in
-*/
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
-{
- struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr;
- struct ctdb_node *node = NULL;
- int i;
-
- for (i=0;i<ctdb->num_nodes;i++) {
- if (ctdb->nodes[i]->vnn == r->hdr.srcnode) {
- node = ctdb->nodes[i];
- break;
- }
- }
- if (!node) {
- DEBUG(0,(__location__ " Keepalive received from node not in ctdb->nodes : %u\n", r->hdr.srcnode));
- return;
- }
-
- node->rx_cnt++;
-}
-
-
+ see if any nodes are dead
+ */
static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int i;
- TALLOC_CTX *mem_ctx = talloc_new(ctdb);
/* send a keepalive to all other nodes, unless */
for (i=0;i<ctdb->num_nodes;i++) {
- if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) {
+ struct ctdb_node *node = ctdb->nodes[i];
+ if (node->vnn == ctdb->vnn) {
continue;
}
- if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) {
- continue;
+
+ /* it might have come alive again */
+ if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) {
+ DEBUG(0,("Node %u is alive again - marking as connected\n", node->vnn));
+ node->flags |= NODE_FLAGS_CONNECTED;
}
- if (ctdb->nodes[i]->rx_cnt == 0) {
- ctdb->nodes[i]->dead_count++;
+ if (node->rx_cnt == 0) {
+ node->dead_count++;
} else {
- ctdb->nodes[i]->dead_count = 0;
+ node->dead_count = 0;
}
- if (ctdb->nodes[i]->dead_count>=3) {
- ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED;
- /* should probably tell the transport layer
- to kill the sockets as well
+ node->rx_cnt = 0;
+
+ if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+ DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
+ node->flags &= ~NODE_FLAGS_CONNECTED;
+ /* maybe tell the transport layer to kill the
+ sockets as well?
*/
continue;
}
- ctdb_send_keepalive(ctdb, mem_ctx, i);
- ctdb->nodes[i]->rx_cnt = 0;
+ ctdb_send_keepalive(ctdb, node->vnn);
}
-
-
-
- talloc_free(mem_ctx);
-
event_add_timed(ctdb->ev, ctdb,
timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0),
ctdb_check_for_dead_nodes, ctdb);
}
+/*
+ start watching for nodes that might be dead
+ */
int ctdb_start_monitoring(struct ctdb_context *ctdb)
{
event_add_timed(ctdb->ev, ctdb,
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 47d0fbb991..821a99efd4 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -311,6 +311,9 @@ struct ctdb_db_context {
/* timeout between dead-node monitoring events */
#define CTDB_MONITORING_TIMEOUT 5
+/* number of monitoring timeouts before a node is considered dead */
+#define CTDB_MONITORING_DEAD_COUNT 3
+
/* number of consecutive calls from the same node before we give them
the record */
@@ -710,7 +713,6 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty
void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid);
void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
@@ -819,6 +821,6 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb);
uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
int ctdb_start_monitoring(struct ctdb_context *ctdb);
-void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t destnode);
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
#endif