summaryrefslogtreecommitdiffstats
path: root/ctdb/server/eventscript.c
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2009-11-24 11:06:53 +1030
committerRusty Russell <rusty@rustcorp.com.au>2009-11-24 11:06:53 +1030
commit0339a83897a3c9e045a18f12fc075faa33cdc1f3 (patch)
treef98864d814b6b4fc2e728933e00b469e33bc8b67 /ctdb/server/eventscript.c
parentb320d434b27827a9e74591d1e4e01b242d978830 (diff)
downloadsamba-0339a83897a3c9e045a18f12fc075faa33cdc1f3.tar.gz
samba-0339a83897a3c9e045a18f12fc075faa33cdc1f3.tar.xz
samba-0339a83897a3c9e045a18f12fc075faa33cdc1f3.zip
eventscript: fix bug in timeouts on forced eventscripts. Again.
In 15bc66ae801b0c69, Ronnie fixed a double-free race. The problem was that ctdb_run_eventscripts() hands a context to ctdb_event_script_callback() to hang its data off, which gets freed in the callback. This particularly hurt in ctdb_event_script_timeout. There's nothing wrong with this, but obviously we should make the callback call last of all. At the time, ctdb_event_script_timeout() carefully extracted everything from the struct ctdb_event_script_state before calling ->callback. This was cleaned up in 64da4402c6ad485f (Ronnie again), and now state was referred to after the callback again. But the same change introduced a direct use-after-free bug which caused an occasional oops. So in our last episode (eda052101728cf92) Volker fixed this, and Michael committed it. But we still have the double free bug which 15bc66ae801b0c69 was supposed to fix! Let's try to fix this in a more permanent way, but always doing the callback from the destructor. This means we need to hold the status, and don't send the KILL signal if ->child is set to 0. Finally, add a comment about freeing ourselves in run_eventscripts_callback and the structure definition. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (This used to be ctdb commit 20b15de068d042b292725945927ceda1b01d07c0)
Diffstat (limited to 'ctdb/server/eventscript.c')
-rw-r--r--ctdb/server/eventscript.c64
1 files changed, 24 insertions, 40 deletions
diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c
index 010da12cdf..0830f4a729 100644
--- a/ctdb/server/eventscript.c
+++ b/ctdb/server/eventscript.c
@@ -61,7 +61,9 @@ static void sigterm(int sig)
struct ctdb_event_script_state {
struct ctdb_context *ctdb;
pid_t child;
+ /* Warning: this can free us! */
void (*callback)(struct ctdb_context *, int, void *);
+ int cb_status;
int fd[2];
void *private_data;
const char *options;
@@ -609,21 +611,16 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
struct ctdb_event_script_state *state =
talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_context *ctdb = state->ctdb;
- int rt;
- if (read(state->fd[0], &rt, sizeof(rt)) != sizeof(rt))
- rt = -2;
-
- DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, rt));
-
- if (state->callback) {
- state->callback(ctdb, rt, state->private_data);
- state->callback = NULL;
+ if (read(state->fd[0], &state->cb_status, sizeof(state->cb_status)) !=
+ sizeof(state->cb_status)) {
+ state->cb_status = -2;
}
- ctdb->event_script_timeouts = 0;
+ DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, state->cb_status));
- talloc_set_destructor(state, NULL);
+ state->child = 0;
+ ctdb->event_script_timeouts = 0;
talloc_free(state);
}
@@ -647,18 +644,13 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
struct timeval t, void *p)
{
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
- void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u pid : %d\n", state->options, ctdb->event_script_timeouts, state->child));
if (kill(state->child, 0) != 0) {
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
- if (state->callback) {
- state->callback(ctdb, 0, private_data);
- state->callback = NULL;
- }
- talloc_set_destructor(state, NULL);
+ state->child = 0;
talloc_free(state);
return;
}
@@ -674,22 +666,13 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
- if (state->callback) {
- state->callback(ctdb, -ETIME, private_data);
- state->callback = NULL;
- }
+ state->cb_status = -ETIME;
} else {
- if (state->callback) {
- state->callback(ctdb, 0, private_data);
- state->callback = NULL;
- }
+ state->cb_status = 0;
}
} else if (!strcmp(state->options, "startup")) {
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
- if (state->callback) {
- state->callback(ctdb, -1, private_data);
- state->callback = NULL;
- }
+ state->cb_status = -1;
} else {
/* if it is not a monitor or a startup event we ban ourself
immediately
@@ -698,10 +681,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
- if (state->callback) {
- state->callback(ctdb, -1, private_data);
- state->callback = NULL;
- }
+ state->cb_status = -1;
}
if (!strcmp(state->options, "monitor") || !strcmp(state->options, "status")) {
@@ -729,19 +709,22 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
}
/*
- destroy a running event script
+ destroy an event script: kill it if ->child != 0.
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
- DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
+ if (state->child) {
+ DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
- if (state->callback) {
- state->callback(state->ctdb, 0, state->private_data);
- state->callback = NULL;
+ if (kill(state->child, SIGTERM) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
+ }
}
- if (kill(state->child, SIGTERM) != 0) {
- DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
+ /* This is allowed to free us; talloc will prevent double free anyway,
+ * but beware if you call this outside the destructor! */
+ if (state->callback) {
+ state->callback(state->ctdb, state->cb_status, state->private_data);
}
return 0;
@@ -944,6 +927,7 @@ static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
}
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+ /* This will free the struct ctdb_event_script_state we are in! */
talloc_free(state);
return;
}