From c99f085747aabafc4a440b5bfd1d9a6bea995620 Mon Sep 17 00:00:00 2001 From: Stephen Gallagher Date: Wed, 1 Dec 2010 12:07:28 -0500 Subject: Resend SIGTERM if child doesn't terminate There is a race condition where if we send a SIGTERM before the kernel has scheduled the child, it may be lost, and the child will not terminate and will leave the monitor hung in wait(). This patch alters this behavior so that we will send the SIGTERM again every 10ms and check the wait() in a nonblocking manner. --- src/monitor/monitor.c | 64 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'src/monitor') diff --git a/src/monitor/monitor.c b/src/monitor/monitor.c index 640d94adc..6eab66fc5 100644 --- a/src/monitor/monitor.c +++ b/src/monitor/monitor.c @@ -1176,6 +1176,8 @@ static void monitor_quit(struct tevent_context *ev, pid_t pid; int status; errno_t error; + int kret; + bool killed; DEBUG(8, ("Received shutdown command\n")); @@ -1189,35 +1191,49 @@ static void monitor_quit(struct tevent_context *ev, continue; } - DEBUG(1, ("Terminating [%s]\n", svc->name)); - kill(svc->pid, SIGTERM); - + killed = false; + DEBUG(1, ("Terminating [%s][%d]\n", svc->name, svc->pid)); do { errno = 0; - pid = waitpid(svc->pid, &status, 0); - if (pid == -1) { - /* An error occurred while waiting */ + kret = kill(svc->pid, SIGTERM); + if (kret < 0) { error = errno; - if (error != EINTR) { - DEBUG(0, ("[%d][%s] while waiting for [%s]\n", - error, strerror(error), svc->name)); - /* Forcibly kill this child */ - kill(svc->pid, SIGKILL); - break; - } - } else { - error = 0; - if WIFEXITED(status) { - DEBUG(1, ("Child [%s] exited gracefully\n", svc->name)); - } else if WIFSIGNALED(status) { - DEBUG(1, ("Child [%s] terminated with a signal\n", svc->name)); - } else { - DEBUG(0, ("Child [%s] did not exit cleanly\n", svc->name)); - /* Forcibly kill this child */ - kill(svc->pid, SIGKILL); + DEBUG(1, ("Couldn't kill [%s][%d]: [%s]\n", + svc->name, svc->pid, strerror(error))); + } + + do { + errno = 0; + pid = waitpid(svc->pid, &status, WNOHANG); + if (pid == -1) { + /* An error occurred while waiting */ + error = errno; + if (error != EINTR) { + DEBUG(0, ("[%d][%s] while waiting for [%s]\n", + error, strerror(error), svc->name)); + /* Forcibly kill this child */ + kill(svc->pid, SIGKILL); + break; + } + } else if (pid != 0) { + error = 0; + if WIFEXITED(status) { + DEBUG(1, ("Child [%s] exited gracefully\n", svc->name)); + } else if WIFSIGNALED(status) { + DEBUG(1, ("Child [%s] terminated with a signal\n", svc->name)); + } else { + DEBUG(0, ("Child [%s] did not exit cleanly\n", svc->name)); + /* Forcibly kill this child */ + kill(svc->pid, SIGKILL); + } + killed = true; } + } while (error == EINTR); + if (!killed) { + /* Sleep 10ms and try again */ + usleep(10000); } - } while (error == EINTR); + } while (!killed); } #if HAVE_GETPGRP -- cgit