summaryrefslogtreecommitdiffstats
path: root/ctdb
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2007-11-12 10:53:11 +1100
committerAndrew Tridgell <tridge@samba.org>2007-11-12 10:53:11 +1100
commitbde886988bf51d00ee779d91017591353e072320 (patch)
tree5a133fac42ca5ccadd458820c1f4a663bb2a69ad /ctdb
parent82bd65274977f86bf5bea87a077ef1180a6109c9 (diff)
downloadsamba-bde886988bf51d00ee779d91017591353e072320.tar.gz
samba-bde886988bf51d00ee779d91017591353e072320.tar.xz
samba-bde886988bf51d00ee779d91017591353e072320.zip
prevent a deadly embrace between smbd and ctdbd by moving the calling
of the startup event scripts after the point where recovery has started and the node is in normal operation This makes the 'startup' script just a special type of the 'monitor' script which is called first (This used to be ctdb commit 7424c30a5fd04aea0137c466b4318c3f185280d8)
Diffstat (limited to 'ctdb')
-rw-r--r--ctdb/include/ctdb_private.h1
-rw-r--r--ctdb/server/ctdb_daemon.c15
-rw-r--r--ctdb/server/ctdb_monitor.c42
3 files changed, 42 insertions, 16 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 7b98683e9f..cb76bb0074 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -366,6 +366,7 @@ struct ctdb_context {
const char *event_script_dir;
const char *default_public_interface;
pid_t recoverd_pid;
+ bool done_startup;
};
struct ctdb_db_context {
diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c
index 671a7e8be2..8f66ade927 100644
--- a/ctdb/server/ctdb_daemon.c
+++ b/ctdb/server/ctdb_daemon.c
@@ -68,13 +68,8 @@ static void print_exit_message(void)
/* called when the "startup" event script has finished */
-static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
+static void ctdb_start_transport(struct ctdb_context *ctdb)
{
- if (status != 0) {
- DEBUG(0,("startup event failed!\n"));
- ctdb_fatal(ctdb, "startup event script failed");
- }
-
/* start the transport running */
if (ctdb->methods->start(ctdb) != 0) {
DEBUG(0,("transport failed to start!\n"));
@@ -664,12 +659,8 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
/* release any IPs we hold from previous runs of the daemon */
ctdb_release_all_ips(ctdb);
- ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb,
- ctdb_start_transport, NULL, "startup");
- if (ret != 0) {
- DEBUG(0,("Failed startup event script\n"));
- return -1;
- }
+ /* start the transport going */
+ ctdb_start_transport(ctdb);
/* go into a wait loop to allow other nodes to complete */
event_loop_wait(ctdb->ev);
diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c
index c96099e76c..52ecc7c713 100644
--- a/ctdb/server/ctdb_monitor.c
+++ b/ctdb/server/ctdb_monitor.c
@@ -138,6 +138,31 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
/*
+ called when the startup event script finishes
+ */
+static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+ if (status != 0) {
+ DEBUG(0,("startup event failed\n"));
+ } else if (status == 0) {
+ DEBUG(0,("startup event OK - enabling monitoring\n"));
+ ctdb->done_startup = true;
+ }
+
+ if (ctdb->done_startup) {
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
+ timeval_zero(),
+ ctdb_check_health, ctdb);
+ } else {
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
+ timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
+ ctdb_check_health, ctdb);
+ }
+
+}
+
+
+/*
see if the event scripts think we are healthy
*/
static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
@@ -146,16 +171,25 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int ret;
- if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
+ if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED && ctdb->done_startup) {
event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
return;
}
- ret = ctdb_event_script_callback(ctdb,
- timeval_current_ofs(ctdb->tunable.script_timeout, 0),
- ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
+ if (!ctdb->done_startup) {
+ ret = ctdb_event_script_callback(ctdb,
+ timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+ ctdb->monitor_context, ctdb_startup_callback,
+ ctdb, "startup");
+ } else {
+ ret = ctdb_event_script_callback(ctdb,
+ timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+ ctdb->monitor_context, ctdb_health_callback,
+ ctdb, "monitor");
+ }
+
if (ret != 0) {
DEBUG(0,("Unable to launch monitor event script\n"));
event_add_timed(ctdb->ev, ctdb->monitor_context,