summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/client/ctdb_client.c18
-rw-r--r--ctdb/include/ctdb.h2
-rw-r--r--ctdb/include/ctdb_private.h4
-rw-r--r--ctdb/server/ctdb_control.c4
-rw-r--r--ctdb/server/ctdb_daemon.c3
-rw-r--r--ctdb/server/ctdb_recover.c38
-rw-r--r--ctdb/server/ctdb_recoverd.c3
-rw-r--r--ctdb/server/ctdb_tunables.c1
8 files changed, 73 insertions, 0 deletions
diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c
index dfcd4d90e7..6d80efc205 100644
--- a/ctdb/client/ctdb_client.c
+++ b/ctdb/client/ctdb_client.c
@@ -3280,3 +3280,21 @@ again:
talloc_free(h);
return 0;
}
+
+/*
+ recovery daemon ping to main daemon
+ */
+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
+{
+ int ret;
+ int32_t res;
+
+ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
+ ctdb, NULL, &res, NULL, NULL);
+ if (ret != 0 || res != 0) {
+ DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h
index d43ab50707..60fa60be58 100644
--- a/ctdb/include/ctdb.h
+++ b/ctdb/include/ctdb.h
@@ -566,4 +566,6 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h,
TDB_DATA key, TDB_DATA data);
int ctdb_transaction_commit(struct ctdb_transaction_handle *h);
+int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb);
+
#endif
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index a25674c9b4..b2ded310b5 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -114,6 +114,7 @@ struct ctdb_tunable {
uint32_t reclock_ping_period;
uint32_t no_ip_failback;
uint32_t verbose_memory_names;
+ uint32_t recd_ping_timeout;
};
/*
@@ -417,6 +418,7 @@ struct ctdb_context {
int start_as_disabled;
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
+ TALLOC_CTX *recd_ping_ctx;
};
struct ctdb_db_context {
@@ -550,6 +552,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_TRANS2_FINISHED = 84,
CTDB_CONTROL_TRANS2_ERROR = 85,
CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86,
+ CTDB_CONTROL_RECD_PING = 87,
};
/*
@@ -1378,5 +1381,6 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
char *ctdb_addr_to_str(ctdb_sock_addr *addr);
void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);
+int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);
#endif
diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c
index 4128797866..94736fb568 100644
--- a/ctdb/server/ctdb_control.c
+++ b/ctdb/server/ctdb_control.c
@@ -406,6 +406,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
case CTDB_CONTROL_TRANS2_FINISHED:
return ctdb_control_trans2_finished(ctdb, c);
+ case CTDB_CONTROL_RECD_PING:
+ CHECK_CONTROL_DATA_SIZE(0);
+ return ctdb_control_recd_ping(ctdb);
+
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;
diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c
index efe3d75349..885ce7e6f6 100644
--- a/ctdb/server/ctdb_daemon.c
+++ b/ctdb/server/ctdb_daemon.c
@@ -103,6 +103,9 @@ static void ctdb_start_transport(struct ctdb_context *ctdb)
/* start periodic update of tcp tickle lists */
ctdb_start_tcp_tickle_update(ctdb);
+
+ /* start listening for recovery daemon pings */
+ ctdb_control_recd_ping(ctdb);
}
static void block_signal(int signum)
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c
index 3243f42faa..6b207d55bc 100644
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@ -971,3 +971,41 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda
return 0;
}
+static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
+{
+ struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+
+ DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Shutting down ctdb daemon\n"));
+
+ ctdb_stop_recoverd(ctdb);
+ ctdb_stop_keepalive(ctdb);
+ ctdb_stop_monitoring(ctdb);
+ ctdb_release_all_ips(ctdb);
+ if (ctdb->methods != NULL) {
+ ctdb->methods->shutdown(ctdb);
+ }
+ ctdb_event_script(ctdb, "shutdown");
+ DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n"));
+ exit(0);
+}
+
+/* The recovery daemon will ping us at regular intervals.
+ If we havent been pinged for a while we assume the recovery
+ daemon is inoperable and we shut down.
+*/
+int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
+{
+ talloc_free(ctdb->recd_ping_ctx);
+
+ ctdb->recd_ping_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_ctx);
+
+ if (ctdb->tunable.recd_ping_timeout != 0) {
+ event_add_timed(ctdb->ev, ctdb->recd_ping_ctx,
+ timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
+ ctdb_recd_ping_timeout, ctdb);
+ }
+
+ return 0;
+}
+
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index c6a4ab322a..a8c004ae0c 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -2317,6 +2317,9 @@ again:
exit(-1);
}
+ /* ping the local daemon to tell it we are alive */
+ ctdb_ctrl_recd_ping(ctdb);
+
if (rec->election_timeout) {
/* an election is in progress */
goto again;
diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c
index d138137afd..de3e46667c 100644
--- a/ctdb/server/ctdb_tunables.c
+++ b/ctdb/server/ctdb_tunables.c
@@ -50,6 +50,7 @@ static const struct {
{ "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) },
{ "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) },
{ "VerboseMemoryNames", 0, offsetof(struct ctdb_tunable, verbose_memory_names) },
+ { "RecdPingTimeout", 60, offsetof(struct ctdb_tunable, recd_ping_timeout) },
};
/*