diff options
| author | Ronnie sahlberg <ronniesahlberg@gmail.com> | 2010-05-03 15:57:41 +1000 |
|---|---|---|
| committer | Ronnie sahlberg <ronniesahlberg@gmail.com> | 2010-05-03 15:57:41 +1000 |
| commit | 46f00a2478fe73aa16fd41813173171c03b74cb8 (patch) | |
| tree | b2a336d462aaa448b9d3cfb960faf930c7dcc6c0 | |
| parent | 62742bd3375a9535d4b58f24337e9b228e259726 (diff) | |
| parent | 6da848f31ca301a1439053ad9d58b19d2ea3f6b4 (diff) | |
Merge commit 'rusty/signal-fix'
(This used to be ctdb commit 221a9bb41c3a7af0cc65cda78365010893ca1430)
| -rw-r--r-- | ctdb/common/ctdb_util.c | 12 | ||||
| -rw-r--r-- | ctdb/include/ctdb_private.h | 1 | ||||
| -rw-r--r-- | ctdb/server/eventscript.c | 99 |
3 files changed, 59 insertions, 53 deletions
diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 63abc02437..433a2ad857 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -339,6 +339,18 @@ void ctdb_high_priority(struct ctdb_context *ctdb) } } +/* + make ourselves slightly nicer: eg. a ctdb child. + */ +void ctdb_reduce_priority(struct ctdb_context *ctdb) +{ + errno = 0; + if (nice(10) == -1 && errno != 0) { + DEBUG(DEBUG_WARNING,("Unable to lower priority: %s\n", + strerror(errno))); + } +} + void set_nonblocking(int fd) { unsigned v; diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index d4c343b498..cb7c165f5e 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -1263,6 +1263,7 @@ void ctdb_node_dead(struct ctdb_node *node); void ctdb_node_connected(struct ctdb_node *node); bool ctdb_blocking_freeze(struct ctdb_context *ctdb); void ctdb_high_priority(struct ctdb_context *ctdb); +void ctdb_reduce_priority(struct ctdb_context *ctdb); int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA indata, diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 740955182e..8011269e26 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -27,44 +27,13 @@ #include "lib/events/events.h" #include "../common/rb_tree.h" -static struct { - struct timeval start; - const char *script_running; -} child_state; - static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p); /* - ctdbd sends us a SIGTERM when we should time out the current script + ctdbd sends us a SIGTERM when we should die. */ static void sigterm(int sig) { - char tbuf[100], buf[200]; - time_t t; - - DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", - child_state.script_running, timeval_elapsed(&child_state.start), getpid())); - - t = time(NULL); - - strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t)); - sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }" - " >/tmp/ctdb.event.%s.%d", tbuf, getpid()); - system(buf); - - DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf)); - - /* all the child processes will be running in the same process group */ - kill(-getpgrp(), SIGKILL); - _exit(1); -} - -/* - ctdbd sends us a SIGABRT when we should abort the current script. - we abort any active monitor script any time a different event is generated. - */ -static void sigabrt(int sig) -{ /* all the child processes will be running in the same process group */ kill(-getpgrp(), SIGKILL); _exit(1); @@ -78,7 +47,6 @@ struct ctdb_event_script_state { int fd[2]; void *private_data; bool from_user; - bool aborted; enum ctdb_eventscript_call call; const char *options; struct timeval timeout; @@ -291,7 +259,6 @@ static int child_setup(struct ctdb_context *ctdb) } signal(SIGTERM, sigterm); - signal(SIGABRT, sigabrt); return 0; } @@ -368,7 +335,6 @@ static int child_run_script(struct ctdb_context *ctdb, int ret; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - child_state.start = timeval_current(); ret = child_setup(ctdb); if (ret != 0) goto out; @@ -376,7 +342,6 @@ static int child_run_script(struct ctdb_context *ctdb, cmdstr = child_command_string(ctdb, tmp_ctx, from_user, current->name, call, options); CTDB_NO_MEMORY(ctdb, cmdstr); - child_state.script_running = cmdstr; DEBUG(DEBUG_DEBUG,("Executing event script %s\n",cmdstr)); @@ -518,6 +483,46 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event } } +static void debug_timeout(struct ctdb_event_script_state *state) +{ + struct ctdb_script_wire *current = get_current_script(state); + char *cmd; + pid_t pid; + time_t t; + char tbuf[100], buf[200]; + + cmd = child_command_string(state->ctdb, state, + state->from_user, current->name, + state->call, state->options); + CTDB_NO_MEMORY_VOID(state->ctdb, cmd); + + DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", + cmd, timeval_elapsed(¤t->start), state->child)); + talloc_free(cmd); + + t = time(NULL); + strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t)); + sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }" + " >/tmp/ctdb.event.%s.%d", tbuf, getpid()); + + pid = fork(); + if (pid == 0) { + ctdb_reduce_priority(state->ctdb); + system(buf); + /* Now we can kill the child */ + kill(state->child, SIGTERM); + exit(0); + } + if (pid == -1) { + DEBUG(DEBUG_ERR,("Fork for debug script failed : %s\n", + strerror(errno))); + } else { + DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf)); + /* Don't kill child until timeout done. */ + state->child = 0; + } +} + /* called when child times out */ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p) @@ -543,11 +548,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve break; default: state->scripts->scripts[state->current].status = -ETIME; - } - - if (kill(state->child, 0) != 0) { - DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno)); - state->child = 0; + debug_timeout(state); } talloc_free(state); @@ -561,17 +562,10 @@ static int event_script_destructor(struct ctdb_event_script_state *state) int status; if (state->child) { - if (state->aborted != True) { - DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child)); + DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child)); - if (kill(state->child, SIGTERM) != 0) { - DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno)); - } - } else { - DEBUG(DEBUG_INFO,(__location__ " Sending SIGABRT to script child pid:%d\n", state->child)); - if (kill(state->child, SIGABRT) != 0) { - DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno)); - } + if (kill(state->child, SIGTERM) != 0) { + DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno)); } } @@ -668,7 +662,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, state->callback = callback; state->private_data = private_data; state->from_user = from_user; - state->aborted = False; state->call = call; state->options = talloc_vasprintf(state, fmt, ap); state->timeout = timeval_set(ctdb->tunable.script_timeout, 0); @@ -711,7 +704,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, /* Kill off any running monitor events to run this event. */ if (ctdb->current_monitor) { /* Discard script status so we don't save to last_status */ - ctdb->current_monitor->aborted = True; talloc_free(ctdb->current_monitor->scripts); ctdb->current_monitor->scripts = NULL; talloc_free(ctdb->current_monitor); @@ -729,6 +721,7 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, return -1; } state->current = 0; + state->child = 0; if (!from_user && (call == CTDB_EVENT_MONITOR || call == CTDB_EVENT_STATUS)) { ctdb->current_monitor = state; |
