summaryrefslogtreecommitdiffstats
path: root/ctdb/server
diff options
context:
space:
mode:
authorRonnie Sahlberg <ronniesahlberg@gmail.com>2008-06-13 13:18:06 +1000
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2008-06-13 13:18:06 +1000
commit779468ab3f0dfdda57230fafb51aa4510818780c (patch)
treed6fac209bd63130b015b0bf4984ef3c897ee0122 /ctdb/server
parent30535c815dc0cd1503312a76e0b9dcd439aff7c4 (diff)
downloadsamba-779468ab3f0dfdda57230fafb51aa4510818780c.tar.gz
samba-779468ab3f0dfdda57230fafb51aa4510818780c.tar.xz
samba-779468ab3f0dfdda57230fafb51aa4510818780c.zip
if the event scripts hangs EventScriptsBanCount consecutive times in a row
the node will ban itself for the default recovery ban period (This used to be ctdb commit 7239d7ecd54037b11eddf47328a3129d281e7d4a)
Diffstat (limited to 'ctdb/server')
-rw-r--r--ctdb/server/ctdb_tunables.c1
-rw-r--r--ctdb/server/eventscript.c33
2 files changed, 32 insertions, 2 deletions
diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c
index 9518b2233c..d138137afd 100644
--- a/ctdb/server/ctdb_tunables.c
+++ b/ctdb/server/ctdb_tunables.c
@@ -38,6 +38,7 @@ static const struct {
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
+ { "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) },
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c
index ff26dd76e1..0e4af037c5 100644
--- a/ctdb/server/eventscript.c
+++ b/ctdb/server/eventscript.c
@@ -222,6 +222,27 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
talloc_set_destructor(state, NULL);
talloc_free(state);
callback(ctdb, status, private_data);
+
+ ctdb->event_script_timeouts = 0;
+}
+
+static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
+{
+ int ret;
+ struct ctdb_ban_info b;
+ TDB_DATA data;
+
+ b.pnn = ctdb->pnn;
+ b.ban_time = ban_period;
+
+ data.dptr = (uint8_t *)&b;
+ data.dsize = sizeof(b);
+
+ ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_BAN_NODE, data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
+ }
}
@@ -234,9 +255,17 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
- DEBUG(DEBUG_ERR,("event script timed out : %s\n", state->options));
+ DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
+
talloc_free(state);
callback(ctdb, -1, private_data);
+
+ ctdb->event_script_timeouts++;
+ if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
+ ctdb->event_script_timeouts = 0;
+ DEBUG(DEBUG_ERR, ("Maximum timeout count reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.recovery_ban_period));
+ ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+ }
}
/*
@@ -308,7 +337,7 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
if (!timeval_is_zero(&timeout)) {
event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
} else {
- DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", fmt));
+ DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
}
return 0;