summaryrefslogtreecommitdiffstats
path: root/ctdb/server/ctdb_monitor.c
diff options
context:
space:
mode:
authorRonnie Sahlberg <ronniesahlberg@gmail.com>2009-12-01 13:19:58 +1100
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2009-12-01 13:19:58 +1100
commit698a0e4e9a41ec1a4e3c26d61221b7921d993e28 (patch)
tree05a58909178635f4e4209d9e84a0bd9dec18fe7d /ctdb/server/ctdb_monitor.c
parent569001afd058254b90e58b8e61fb01105d05acf7 (diff)
downloadsamba-698a0e4e9a41ec1a4e3c26d61221b7921d993e28.tar.gz
samba-698a0e4e9a41ec1a4e3c26d61221b7921d993e28.tar.xz
samba-698a0e4e9a41ec1a4e3c26d61221b7921d993e28.zip
When starting up ctdbd, wait until all initial recoveries have finished
and until we have gone through a full re-recovery timeout without triggering any pending recoveries before we start up the services and start monitoring the node. (This used to be ctdb commit 821333afb458358f90446062b0242790695e5060)
Diffstat (limited to 'ctdb/server/ctdb_monitor.c')
-rw-r--r--ctdb/server/ctdb_monitor.c52
1 files changed, 47 insertions, 5 deletions
diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c
index 437b9d894df..f4223772b6c 100644
--- a/ctdb/server/ctdb_monitor.c
+++ b/ctdb/server/ctdb_monitor.c
@@ -205,6 +205,51 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p
/*
+ wait until we have finished initial recoveries before we start the
+ monitoring events
+ */
+static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+ DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
+
+ if (ctdb->vnn_map->generation == INVALID_GENERATION) {
+ DEBUG(DEBUG_NOTICE,(__location__ " generation is INVALID. Wait one more second\n"));
+ event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+ if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+ DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
+ event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+
+ if (timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
+ DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));
+
+ event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_wait_until_recovered, ctdb);
+ return;
+ }
+
+
+ DEBUG(DEBUG_NOTICE,(__location__ " Recoveries finished. Running the \"startup\" event.\n"));
+ event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+ timeval_current_ofs(1, 0),
+ ctdb_check_health, ctdb);
+}
+
+
+/*
see if the event scripts think we are healthy
*/
static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
@@ -301,8 +346,6 @@ void ctdb_stop_monitoring(struct ctdb_context *ctdb)
*/
void ctdb_start_monitoring(struct ctdb_context *ctdb)
{
- struct timed_event *te;
-
if (ctdb->monitor != NULL) {
return;
}
@@ -315,10 +358,9 @@ void ctdb_start_monitoring(struct ctdb_context *ctdb)
ctdb->monitor->monitor_context = talloc_new(ctdb->monitor);
CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context);
- te = event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+ event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
timeval_current_ofs(1, 0),
- ctdb_check_health, ctdb);
- CTDB_NO_MEMORY_FATAL(ctdb, te);
+ ctdb_wait_until_recovered, ctdb);
ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE;
DEBUG(DEBUG_NOTICE,("Monitoring has been started\n"));