diff options
Diffstat (limited to 'ctdb')
-rw-r--r-- | ctdb/client/ctdb_client.c | 18 | ||||
-rw-r--r-- | ctdb/include/ctdb.h | 2 | ||||
-rw-r--r-- | ctdb/include/ctdb_private.h | 4 | ||||
-rw-r--r-- | ctdb/server/ctdb_control.c | 4 | ||||
-rw-r--r-- | ctdb/server/ctdb_daemon.c | 3 | ||||
-rw-r--r-- | ctdb/server/ctdb_recover.c | 38 | ||||
-rw-r--r-- | ctdb/server/ctdb_recoverd.c | 3 | ||||
-rw-r--r-- | ctdb/server/ctdb_tunables.c | 1 |
8 files changed, 73 insertions, 0 deletions
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index dfcd4d90e7..6d80efc205 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3280,3 +3280,21 @@ again: talloc_free(h); return 0; } + +/* + recovery daemon ping to main daemon + */ +int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb) +{ + int ret; + int32_t res; + + ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null, + ctdb, NULL, &res, NULL, NULL); + if (ret != 0 || res != 0) { + DEBUG(DEBUG_ERR,("Failed to send recd ping\n")); + return -1; + } + + return 0; +} diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index d43ab50707..60fa60be58 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -566,4 +566,6 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, TDB_DATA key, TDB_DATA data); int ctdb_transaction_commit(struct ctdb_transaction_handle *h); +int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb); + #endif diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index a25674c9b4..b2ded310b5 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -114,6 +114,7 @@ struct ctdb_tunable { uint32_t reclock_ping_period; uint32_t no_ip_failback; uint32_t verbose_memory_names; + uint32_t recd_ping_timeout; }; /* @@ -417,6 +418,7 @@ struct ctdb_context { int start_as_disabled; uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */ TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */ + TALLOC_CTX *recd_ping_ctx; }; struct ctdb_db_context { @@ -550,6 +552,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_TRANS2_FINISHED = 84, CTDB_CONTROL_TRANS2_ERROR = 85, CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86, + CTDB_CONTROL_RECD_PING = 87, }; /* @@ -1378,5 +1381,6 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, char *ctdb_addr_to_str(ctdb_sock_addr *addr); void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip); +int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb); #endif diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 4128797866..94736fb568 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -406,6 +406,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_TRANS2_FINISHED: return ctdb_control_trans2_finished(ctdb, c); + case CTDB_CONTROL_RECD_PING: + CHECK_CONTROL_DATA_SIZE(0); + return ctdb_control_recd_ping(ctdb); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index efe3d75349..885ce7e6f6 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -103,6 +103,9 @@ static void ctdb_start_transport(struct ctdb_context *ctdb) /* start periodic update of tcp tickle lists */ ctdb_start_tcp_tickle_update(ctdb); + + /* start listening for recovery daemon pings */ + ctdb_control_recd_ping(ctdb); } static void block_signal(int signum) diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 3243f42faa..6b207d55bc 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -971,3 +971,41 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda return 0; } +static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p) +{ + struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context); + + DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Shutting down ctdb daemon\n")); + + ctdb_stop_recoverd(ctdb); + ctdb_stop_keepalive(ctdb); + ctdb_stop_monitoring(ctdb); + ctdb_release_all_ips(ctdb); + if (ctdb->methods != NULL) { + ctdb->methods->shutdown(ctdb); + } + ctdb_event_script(ctdb, "shutdown"); + DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n")); + exit(0); +} + +/* The recovery daemon will ping us at regular intervals. + If we havent been pinged for a while we assume the recovery + daemon is inoperable and we shut down. +*/ +int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb) +{ + talloc_free(ctdb->recd_ping_ctx); + + ctdb->recd_ping_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_ctx); + + if (ctdb->tunable.recd_ping_timeout != 0) { + event_add_timed(ctdb->ev, ctdb->recd_ping_ctx, + timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0), + ctdb_recd_ping_timeout, ctdb); + } + + return 0; +} + diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c6a4ab322a..a8c004ae0c 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2317,6 +2317,9 @@ again: exit(-1); } + /* ping the local daemon to tell it we are alive */ + ctdb_ctrl_recd_ping(ctdb); + if (rec->election_timeout) { /* an election is in progress */ goto again; diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c index d138137afd..de3e46667c 100644 --- a/ctdb/server/ctdb_tunables.c +++ b/ctdb/server/ctdb_tunables.c @@ -50,6 +50,7 @@ static const struct { { "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) }, { "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) }, { "VerboseMemoryNames", 0, offsetof(struct ctdb_tunable, verbose_memory_names) }, + { "RecdPingTimeout", 60, offsetof(struct ctdb_tunable, recd_ping_timeout) }, }; /* |