diff options
author | Andrew Tridgell <tridge@samba.org> | 2007-11-12 10:53:11 +1100 |
---|---|---|
committer | Andrew Tridgell <tridge@samba.org> | 2007-11-12 10:53:11 +1100 |
commit | bde886988bf51d00ee779d91017591353e072320 (patch) | |
tree | 5a133fac42ca5ccadd458820c1f4a663bb2a69ad /ctdb | |
parent | 82bd65274977f86bf5bea87a077ef1180a6109c9 (diff) | |
download | samba-bde886988bf51d00ee779d91017591353e072320.tar.gz samba-bde886988bf51d00ee779d91017591353e072320.tar.xz samba-bde886988bf51d00ee779d91017591353e072320.zip |
prevent a deadly embrace between smbd and ctdbd by moving the calling
of the startup event scripts after the point where recovery has
started and the node is in normal operation
This makes the 'startup' script just a special type of the 'monitor'
script which is called first
(This used to be ctdb commit 7424c30a5fd04aea0137c466b4318c3f185280d8)
Diffstat (limited to 'ctdb')
-rw-r--r-- | ctdb/include/ctdb_private.h | 1 | ||||
-rw-r--r-- | ctdb/server/ctdb_daemon.c | 15 | ||||
-rw-r--r-- | ctdb/server/ctdb_monitor.c | 42 |
3 files changed, 42 insertions, 16 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 7b98683e9f..cb76bb0074 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -366,6 +366,7 @@ struct ctdb_context { const char *event_script_dir; const char *default_public_interface; pid_t recoverd_pid; + bool done_startup; }; struct ctdb_db_context { diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index 671a7e8be2..8f66ade927 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -68,13 +68,8 @@ static void print_exit_message(void) /* called when the "startup" event script has finished */ -static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p) +static void ctdb_start_transport(struct ctdb_context *ctdb) { - if (status != 0) { - DEBUG(0,("startup event failed!\n")); - ctdb_fatal(ctdb, "startup event script failed"); - } - /* start the transport running */ if (ctdb->methods->start(ctdb) != 0) { DEBUG(0,("transport failed to start!\n")); @@ -664,12 +659,8 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork) /* release any IPs we hold from previous runs of the daemon */ ctdb_release_all_ips(ctdb); - ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb, - ctdb_start_transport, NULL, "startup"); - if (ret != 0) { - DEBUG(0,("Failed startup event script\n")); - return -1; - } + /* start the transport going */ + ctdb_start_transport(ctdb); /* go into a wait loop to allow other nodes to complete */ event_loop_wait(ctdb->ev); diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index c96099e76c..52ecc7c713 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -138,6 +138,31 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p) /* + called when the startup event script finishes + */ +static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p) +{ + if (status != 0) { + DEBUG(0,("startup event failed\n")); + } else if (status == 0) { + DEBUG(0,("startup event OK - enabling monitoring\n")); + ctdb->done_startup = true; + } + + if (ctdb->done_startup) { + event_add_timed(ctdb->ev, ctdb->monitor_context, + timeval_zero(), + ctdb_check_health, ctdb); + } else { + event_add_timed(ctdb->ev, ctdb->monitor_context, + timeval_current_ofs(ctdb->tunable.monitor_interval, 0), + ctdb_check_health, ctdb); + } + +} + + +/* see if the event scripts think we are healthy */ static void ctdb_check_health(struct event_context *ev, struct timed_event *te, @@ -146,16 +171,25 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te, struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); int ret; - if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) { + if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED && ctdb->done_startup) { event_add_timed(ctdb->ev, ctdb->monitor_context, timeval_current_ofs(ctdb->tunable.monitor_interval, 0), ctdb_check_health, ctdb); return; } - ret = ctdb_event_script_callback(ctdb, - timeval_current_ofs(ctdb->tunable.script_timeout, 0), - ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor"); + if (!ctdb->done_startup) { + ret = ctdb_event_script_callback(ctdb, + timeval_current_ofs(ctdb->tunable.script_timeout, 0), + ctdb->monitor_context, ctdb_startup_callback, + ctdb, "startup"); + } else { + ret = ctdb_event_script_callback(ctdb, + timeval_current_ofs(ctdb->tunable.script_timeout, 0), + ctdb->monitor_context, ctdb_health_callback, + ctdb, "monitor"); + } + if (ret != 0) { DEBUG(0,("Unable to launch monitor event script\n")); event_add_timed(ctdb->ev, ctdb->monitor_context, |