summaryrefslogtreecommitdiffstats
path: root/ctdb/server/eventscript.c
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2009-11-24 11:06:53 +1030
committerRusty Russell <rusty@rustcorp.com.au>2009-11-24 11:06:53 +1030
commitab675516cc568473bda357488e6b0b85f33698a1 (patch)
tree89d561db7631f3c454ea2851b95e4321ef91a325 /ctdb/server/eventscript.c
parent8723045c614d8e1d85048f8cd0108efeb44bc695 (diff)
downloadsamba-ab675516cc568473bda357488e6b0b85f33698a1.tar.gz
samba-ab675516cc568473bda357488e6b0b85f33698a1.tar.xz
samba-ab675516cc568473bda357488e6b0b85f33698a1.zip
eventscript: fix bug in timeouts on forced eventscripts. Again.
In 15bc66ae801b0c69, Ronnie fixed a double-free race. The problem was that ctdb_run_eventscripts() hands a context to ctdb_event_script_callback() to hang its data off, which gets freed in the callback. This particularly hurt in ctdb_event_script_timeout. There's nothing wrong with this, but obviously we should make the callback call last of all. At the time, ctdb_event_script_timeout() carefully extracted everything from the struct ctdb_event_script_state before calling ->callback. This was cleaned up in 64da4402c6ad485f (Ronnie again), and now state was referred to after the callback again. But the same change introduced a direct use-after-free bug which caused an occasional oops. So in our last episode (eda052101728cf92) Volker fixed this, and Michael committed it. But we still have the double free bug which 15bc66ae801b0c69 was supposed to fix! Let's try to fix this in a more permanent way, but always doing the callback from the destructor. This means we need to hold the status, and don't send the KILL signal if ->child is set to 0. Finally, add a comment about freeing ourselves in run_eventscripts_callback and the structure definition. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (This used to be ctdb commit b90bdb07c1f6913ddbf11bde9684bdc8af61c549)
Diffstat (limited to 'ctdb/server/eventscript.c')
-rw-r--r--ctdb/server/eventscript.c64
1 files changed, 24 insertions, 40 deletions
diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c
index 30e062f1cf..854acd655e 100644
--- a/ctdb/server/eventscript.c
+++ b/ctdb/server/eventscript.c
@@ -61,7 +61,9 @@ static void sigterm(int sig)
struct ctdb_event_script_state {
struct ctdb_context *ctdb;
pid_t child;
+ /* Warning: this can free us! */
void (*callback)(struct ctdb_context *, int, void *);
+ int cb_status;
int fd[2];
void *private_data;
const char *options;
@@ -609,21 +611,16 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
struct ctdb_event_script_state *state =
talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_context *ctdb = state->ctdb;
- int rt;
- if (read(state->fd[0], &rt, sizeof(rt)) != sizeof(rt))
- rt = -2;
-
- DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, rt));
-
- if (state->callback) {
- state->callback(ctdb, rt, state->private_data);
- state->callback = NULL;
+ if (read(state->fd[0], &state->cb_status, sizeof(state->cb_status)) !=
+ sizeof(state->cb_status)) {
+ state->cb_status = -2;
}
- ctdb->event_script_timeouts = 0;
+ DEBUG(DEBUG_INFO,(__location__ " Eventscript %s finished with state %d\n", state->options, state->cb_status));
- talloc_set_destructor(state, NULL);
+ state->child = 0;
+ ctdb->event_script_timeouts = 0;
talloc_free(state);
}
@@ -647,18 +644,13 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
struct timeval t, void *p)
{
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
- void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u pid : %d\n", state->options, ctdb->event_script_timeouts, state->child));
if (kill(state->child, 0) != 0) {
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
- if (state->callback) {
- state->callback(ctdb, 0, private_data);
- state->callback = NULL;
- }
- talloc_set_destructor(state, NULL);
+ state->child = 0;
talloc_free(state);
return;
}
@@ -674,22 +666,13 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
- if (state->callback) {
- state->callback(ctdb, -ETIME, private_data);
- state->callback = NULL;
- }
+ state->cb_status = -ETIME;
} else {
- if (state->callback) {
- state->callback(ctdb, 0, private_data);
- state->callback = NULL;
- }
+ state->cb_status = 0;
}
} else if (!strcmp(state->options, "startup")) {
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
- if (state->callback) {
- state->callback(ctdb, -1, private_data);
- state->callback = NULL;
- }
+ state->cb_status = -1;
} else {
/* if it is not a monitor or a startup event we ban ourself
immediately
@@ -698,10 +681,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
- if (state->callback) {
- state->callback(ctdb, -1, private_data);
- state->callback = NULL;
- }
+ state->cb_status = -1;
}
if (!strcmp(state->options, "monitor") || !strcmp(state->options, "status")) {
@@ -729,19 +709,22 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
}
/*
- destroy a running event script
+ destroy an event script: kill it if ->child != 0.
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
- DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
+ if (state->child) {
+ DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
- if (state->callback) {
- state->callback(state->ctdb, 0, state->private_data);
- state->callback = NULL;
+ if (kill(state->child, SIGTERM) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
+ }
}
- if (kill(state->child, SIGTERM) != 0) {
- DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
+ /* This is allowed to free us; talloc will prevent double free anyway,
+ * but beware if you call this outside the destructor! */
+ if (state->callback) {
+ state->callback(state->ctdb, state->cb_status, state->private_data);
}
return 0;
@@ -943,6 +926,7 @@ static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
}
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+ /* This will free the struct ctdb_event_script_state we are in! */
talloc_free(state);
return;
}