summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/monitor/monitor.c72
1 files changed, 59 insertions, 13 deletions
diff --git a/src/monitor/monitor.c b/src/monitor/monitor.c
index 0db3cea8d..8098a5830 100644
--- a/src/monitor/monitor.c
+++ b/src/monitor/monitor.c
@@ -58,6 +58,19 @@
* monitor will get crazy hammering children with messages */
#define MONITOR_DEF_PING_TIME 10
+/* TODO: get the restart related values from config */
+#define MONITOR_RESTART_CNT_INTERVAL_RESET 30
+/* maximum allowed number of service restarts if the restarts
+ * were less than MONITOR_RESTART_CNT_INTERVAL_RESET apart, which would
+ * indicate a crash after startup or after every request */
+#define MONITOR_MAX_SVC_RESTARTS 2
+/* The services are restarted with a delay in case the restart was
+ * hitting a race condition where the DP is not ready yet either.
+ * The MONITOR_MAX_RESTART_DELAY defines the maximum delay between
+ * restarts.
+ */
+#define MONITOR_MAX_RESTART_DELAY 4
+
/* Special value to leave the Kerberos Replay Cache set to use
* the libkrb5 defaults
*/
@@ -2238,10 +2251,43 @@ static void service_startup_handler(struct tevent_context *ev,
_exit(1);
}
+static void mt_svc_restart(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *ptr)
+{
+ struct mt_svc *svc;
+
+ svc = talloc_get_type(ptr, struct mt_svc);
+ if (svc == NULL) {
+ return;
+ }
+
+ DEBUG(6, ("Scheduling service %s for restart %d\n",
+ svc->name, svc->restarts+1));
+
+ if (svc->type == MT_SVC_SERVICE) {
+ add_new_service(svc->mt_ctx, svc->name, svc->restarts + 1);
+ } else if (svc->type == MT_SVC_PROVIDER) {
+ add_new_provider(svc->mt_ctx, svc->name, svc->restarts + 1);
+ } else {
+ /* Invalid type? */
+ DEBUG(1,
+ ("BUG: Invalid child process type [%d]\n", svc->type));
+ }
+
+ /* Free the old service (which will also remove it
+ * from the child list)
+ */
+ talloc_free(svc);
+}
+
static void mt_svc_exit_handler(int pid, int wait_status, void *pvt)
{
struct mt_svc *svc = talloc_get_type(pvt, struct mt_svc);
time_t now = time(NULL);
+ struct tevent_timer *te;
+ struct timeval tv;
+ int restart_delay;
if WIFEXITED(wait_status) {
DEBUG(2, ("Child [%s] exited with code [%d]\n",
@@ -2260,30 +2306,30 @@ static void mt_svc_exit_handler(int pid, int wait_status, void *pvt)
return;
}
- if ((now - svc->last_restart) > 30) { /* TODO: get val from config */
+ if ((now - svc->last_restart) > MONITOR_RESTART_CNT_INTERVAL_RESET) {
svc->restarts = 0;
}
/* Restart the service */
- if (svc->restarts > 2) { /* TODO: get val from config */
+ if (svc->restarts > MONITOR_MAX_SVC_RESTARTS) {
DEBUG(0, ("Process [%s], definitely stopped!\n", svc->name));
talloc_free(svc);
return;
}
- if (svc->type == MT_SVC_SERVICE) {
- add_new_service(svc->mt_ctx, svc->name, svc->restarts + 1);
- } else if (svc->type == MT_SVC_PROVIDER) {
- add_new_provider(svc->mt_ctx, svc->name, svc->restarts + 1);
- } else {
- /* Invalid type? */
- DEBUG(1, ("BUG: Invalid child process type [%d]\n", svc->type));
+ /* restarts are schedule after 0, 2, 4 seconds */
+ restart_delay = svc->restarts << 1;
+ if (restart_delay > MONITOR_MAX_RESTART_DELAY) {
+ restart_delay = MONITOR_MAX_RESTART_DELAY;
}
- /* Free the old service (which will also remove it
- * from the child list)
- */
- talloc_free(svc);
+ tv = tevent_timeval_current_ofs(restart_delay, 0);
+ te = tevent_add_timer(svc->mt_ctx->ev, svc, tv, mt_svc_restart, svc);
+ if (!te) {
+ /* Nothing much we can do */
+ DEBUG(1, ("Out of memory?!\n"));
+ return;
+ }
}
int main(int argc, const char *argv[])