diff options
author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2009-12-01 13:19:58 +1100 |
---|---|---|
committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2009-12-01 13:19:58 +1100 |
commit | 698a0e4e9a41ec1a4e3c26d61221b7921d993e28 (patch) | |
tree | 05a58909178635f4e4209d9e84a0bd9dec18fe7d /ctdb/server/ctdb_monitor.c | |
parent | 569001afd058254b90e58b8e61fb01105d05acf7 (diff) | |
download | samba-698a0e4e9a41ec1a4e3c26d61221b7921d993e28.tar.gz samba-698a0e4e9a41ec1a4e3c26d61221b7921d993e28.tar.xz samba-698a0e4e9a41ec1a4e3c26d61221b7921d993e28.zip |
When starting up ctdbd, wait until all initial recoveries have finished
and until we have gone through a full re-recovery timeout without triggering
any pending recoveries before we start up the services and start monitoring
the node.
(This used to be ctdb commit 821333afb458358f90446062b0242790695e5060)
Diffstat (limited to 'ctdb/server/ctdb_monitor.c')
-rw-r--r-- | ctdb/server/ctdb_monitor.c | 52 |
1 files changed, 47 insertions, 5 deletions
diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index 437b9d894df..f4223772b6c 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -205,6 +205,51 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p /* + wait until we have finished initial recoveries before we start the + monitoring events + */ +static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + + DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n")); + + if (ctdb->vnn_map->generation == INVALID_GENERATION) { + DEBUG(DEBUG_NOTICE,(__location__ " generation is INVALID. Wait one more second\n")); + event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, + timeval_current_ofs(1, 0), + ctdb_wait_until_recovered, ctdb); + return; + } + + if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) { + DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n")); + event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, + timeval_current_ofs(1, 0), + ctdb_wait_until_recovered, ctdb); + return; + } + + + if (timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) { + DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n")); + + event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, + timeval_current_ofs(1, 0), + ctdb_wait_until_recovered, ctdb); + return; + } + + + DEBUG(DEBUG_NOTICE,(__location__ " Recoveries finished. Running the \"startup\" event.\n")); + event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, + timeval_current_ofs(1, 0), + ctdb_check_health, ctdb); +} + + +/* see if the event scripts think we are healthy */ static void ctdb_check_health(struct event_context *ev, struct timed_event *te, @@ -301,8 +346,6 @@ void ctdb_stop_monitoring(struct ctdb_context *ctdb) */ void ctdb_start_monitoring(struct ctdb_context *ctdb) { - struct timed_event *te; - if (ctdb->monitor != NULL) { return; } @@ -315,10 +358,9 @@ void ctdb_start_monitoring(struct ctdb_context *ctdb) ctdb->monitor->monitor_context = talloc_new(ctdb->monitor); CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor->monitor_context); - te = event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, + event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, timeval_current_ofs(1, 0), - ctdb_check_health, ctdb); - CTDB_NO_MEMORY_FATAL(ctdb, te); + ctdb_wait_until_recovered, ctdb); ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE; DEBUG(DEBUG_NOTICE,("Monitoring has been started\n")); |