summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/config/SSSDConfig/__init__.py.in2
-rw-r--r--src/monitor/monitor.c231
-rw-r--r--src/monitor/monitor_iface.xml2
3 files changed, 8 insertions, 227 deletions
diff --git a/src/config/SSSDConfig/__init__.py.in b/src/config/SSSDConfig/__init__.py.in
index 06127584f..52af1386c 100644
--- a/src/config/SSSDConfig/__init__.py.in
+++ b/src/config/SSSDConfig/__init__.py.in
@@ -45,7 +45,7 @@ option_strings = {
'debug_timestamps' : _('Include timestamps in debug logs'),
'debug_microseconds' : _('Include microseconds in timestamps in debug logs'),
'debug_to_files' : _('Write debug messages to logfiles'),
- 'timeout' : _('Ping timeout before restarting service'),
+ 'timeout' : _('Watchdog timeout before restarting service'),
'force_timeout' : _('Timeout between three failed ping checks and forcibly killing the service'),
'command' : _('Command to start service'),
'reconnection_retries' : _('Number of times to attempt connection to Data Providers'),
diff --git a/src/monitor/monitor.c b/src/monitor/monitor.c
index c71cdc228..b616a3454 100644
--- a/src/monitor/monitor.c
+++ b/src/monitor/monitor.c
@@ -55,9 +55,6 @@
#include <keyutils.h>
#endif
-/* ping time cannot be less then once every few seconds or the
- * monitor will get crazy hammering children with messages */
-#define MONITOR_DEF_PING_TIME 10
/* terminate the child after this interval by default if it
* doesn't shutdown on receiving SIGTERM */
#define MONITOR_DEF_FORCE_TIME 60
@@ -117,7 +114,6 @@ struct mt_svc {
pid_t pid;
char *diag_cmd;
- int ping_time;
int kill_time;
struct tevent_timer *kill_timer;
@@ -126,13 +122,10 @@ struct mt_svc {
int restarts;
time_t last_restart;
- int failed_pongs;
DBusPendingCall *pending;
int debug_level;
- struct tevent_timer *ping_ev;
-
struct sss_child_ctx *child_ctx;
};
@@ -183,11 +176,8 @@ static int start_service(struct mt_svc *mt_svc);
static int monitor_service_init(struct sbus_connection *conn, void *data);
-static int service_send_ping(struct mt_svc *svc);
static int service_signal_reset_offline(struct mt_svc *svc);
-static void ping_check(DBusPendingCall *pending, void *data);
-static void set_tasks_checker(struct mt_svc *srv);
static int monitor_kill_service (struct mt_svc *svc);
static int get_service_config(struct mt_ctx *ctx, const char *name,
@@ -337,7 +327,7 @@ static int svc_destructor(void *mem)
DLIST_REMOVE(svc->mt_ctx->svc_list, svc);
}
- /* Cancel any pending pings */
+ /* Cancel any pending calls */
if (svc->pending) {
dbus_pending_call_cancel(svc->pending);
}
@@ -700,67 +690,6 @@ static int monitor_dbus_init(struct mt_ctx *ctx)
return ret;
}
-static void tasks_check_handler(struct tevent_context *ev,
- struct tevent_timer *te,
- struct timeval t, void *ptr)
-{
- struct mt_svc *svc = talloc_get_type(ptr, struct mt_svc);
- int ret;
-
- ret = service_send_ping(svc);
- switch (ret) {
- case EOK:
- /* all fine */
- break;
-
- case ENXIO:
- DEBUG(SSSDBG_CRIT_FAILURE,
- "Child (%s) not responding! (yet)\n", svc->name);
- break;
-
- default:
- /* TODO: should we tear it down ? */
- DEBUG(SSSDBG_CRIT_FAILURE,
- "Sending a message to service (%s) failed!!\n", svc->name);
- break;
- }
-
- if (svc->failed_pongs >= 3) {
- /* too long since we last heard of this process */
- DEBUG(SSSDBG_CRIT_FAILURE,
- "Killing service [%s], not responding to pings!\n",
- svc->name);
- sss_log(SSS_LOG_ERR,
- "Killing service [%s], not responding to pings!\n",
- svc->name);
-
- /* Kill the service. The SIGCHLD handler will restart it */
- monitor_kill_service(svc);
- return;
- }
-
- /* all fine, set up the task checker again */
- set_tasks_checker(svc);
-}
-
-static void set_tasks_checker(struct mt_svc *svc)
-{
- struct tevent_timer *te = NULL;
- struct timeval tv;
-
- gettimeofday(&tv, NULL);
- tv.tv_sec += svc->ping_time;
- tv.tv_usec = 0;
- te = tevent_add_timer(svc->mt_ctx->ev, svc, tv, tasks_check_handler, svc);
- if (te == NULL) {
- DEBUG(SSSDBG_FATAL_FAILURE,
- "failed to add event, monitor offline for [%s]!\n",
- svc->name);
- /* FIXME: shutdown ? */
- }
- svc->ping_ev = te;
-}
-
static void monitor_restart_service(struct mt_svc *svc);
static void mt_svc_sigkill(struct tevent_context *ev,
struct tevent_timer *te,
@@ -1214,29 +1143,11 @@ static int get_monitor_config(struct mt_ctx *ctx)
return EOK;
}
-static errno_t get_ping_config(struct mt_ctx *ctx, const char *path,
+static errno_t get_kill_config(struct mt_ctx *ctx, const char *path,
struct mt_svc *svc)
{
errno_t ret;
- ret = confdb_get_int(ctx->cdb, path,
- CONFDB_DOMAIN_TIMEOUT,
- MONITOR_DEF_PING_TIME, &svc->ping_time);
- if (ret != EOK) {
- DEBUG(SSSDBG_CRIT_FAILURE,
- "Failed to get ping timeout for '%s'\n", svc->name);
- return ret;
- }
-
- /* 'timeout = 0' should be translated to the default */
- if (svc->ping_time == 0) {
- svc->ping_time = MONITOR_DEF_PING_TIME;
- }
-
- DEBUG(SSSDBG_CONF_SETTINGS,
- "Time between service pings for [%s]: [%d]\n",
- svc->name, svc->ping_time);
-
ret = confdb_get_string(ctx->cdb, svc, path,
CONFDB_MONITOR_PRE_KILL_CMD,
NULL, &svc->diag_cmd);
@@ -1407,10 +1318,10 @@ static int get_service_config(struct mt_ctx *ctx, const char *name,
}
}
- ret = get_ping_config(ctx, path, svc);
+ ret = get_kill_config(ctx, path, svc);
if (ret != EOK) {
DEBUG(SSSDBG_CRIT_FAILURE,
- "Failed to get ping timeouts for %s\n", svc->name);
+ "Failed to get kill timeouts for %s\n", svc->name);
talloc_free(svc);
return ret;
}
@@ -1502,10 +1413,10 @@ static int get_provider_config(struct mt_ctx *ctx, const char *name,
return ret;
}
- ret = get_ping_config(ctx, path, svc);
+ ret = get_kill_config(ctx, path, svc);
if (ret != EOK) {
DEBUG(SSSDBG_CRIT_FAILURE,
- "Failed to get ping timeouts for %s\n", svc->name);
+ "Failed to get kill timeouts for %s\n", svc->name);
talloc_free(svc);
return ret;
}
@@ -2658,134 +2569,6 @@ static int monitor_service_init(struct sbus_connection *conn, void *data)
MON_SRV_PATH, mini);
}
-/* service_send_ping
- * this function send a dbus ping to a service.
- * It returns EOK if all is fine or ENXIO if the connection is
- * not available (either not yet set up or teared down).
- * Returns e generic error in other cases.
- */
-static int service_send_ping(struct mt_svc *svc)
-{
- DBusMessage *msg;
- int ret;
-
- if (!svc->conn) {
- DEBUG(SSSDBG_TRACE_INTERNAL, "Service not yet initialized\n");
- return ENXIO;
- }
-
- DEBUG(SSSDBG_TRACE_INTERNAL, "Pinging %s\n", svc->name);
-
- /*
- * Set up identity request
- * This should be a well-known path and method
- * for all services
- */
- msg = dbus_message_new_method_call(NULL,
- MONITOR_PATH,
- MON_CLI_IFACE,
- MON_CLI_IFACE_PING);
- if (!msg) {
- DEBUG(SSSDBG_FATAL_FAILURE,"Out of memory?!\n");
- talloc_zfree(svc->conn);
- return ENOMEM;
- }
-
- ret = sbus_conn_send(svc->conn, msg,
- svc->ping_time * 1000, /* milliseconds */
- ping_check, svc, &svc->pending);
- dbus_message_unref(msg);
- return ret;
-}
-
-static void ping_check(DBusPendingCall *pending, void *data)
-{
- struct mt_svc *svc;
- DBusMessage *reply;
- const char *dbus_error_name;
- size_t len;
- int type;
-
- svc = talloc_get_type(data, struct mt_svc);
- if (!svc) {
- /* The connection probably went down before the callback fired.
- * Not much we can do. */
- DEBUG(SSSDBG_CRIT_FAILURE, "Invalid service pointer.\n");
- return;
- }
- svc->pending = NULL;
-
- reply = dbus_pending_call_steal_reply(pending);
- if (!reply) {
- /* reply should never be null. This function shouldn't be called
- * until reply is valid or timeout has occurred. If reply is NULL
- * here, something is seriously wrong and we should bail out.
- */
- DEBUG(SSSDBG_FATAL_FAILURE,
- "A reply callback was called but no reply was received"
- " and no timeout occurred\n");
-
- /* Destroy this connection */
- sbus_disconnect(svc->conn);
- goto done;
- }
-
- type = dbus_message_get_type(reply);
- switch (type) {
- case DBUS_MESSAGE_TYPE_METHOD_RETURN:
- /* ok peer replied,
- * make sure we reset the failure counter in the service structure */
-
- DEBUG(SSSDBG_TRACE_INTERNAL,
- "Service %s replied to ping\n", svc->name);
-
- svc->failed_pongs = 0;
- break;
-
- case DBUS_MESSAGE_TYPE_ERROR:
-
- dbus_error_name = dbus_message_get_error_name(reply);
- if (!dbus_error_name) {
- dbus_error_name = "<UNKNOWN>";
- }
-
- len = strlen(DBUS_ERROR_NO_REPLY);
-
- /* Increase failed pong count */
- if (strnlen(dbus_error_name, len + 1) == len
- && strncmp(dbus_error_name, DBUS_ERROR_NO_REPLY, len) == 0) {
- DEBUG(SSSDBG_CRIT_FAILURE,
- "A service PING timed out on [%s]. "
- "Attempt [%d]\n",
- svc->name, svc->failed_pongs);
- svc->failed_pongs++;
-
- if (debug_level & SSSDBG_TRACE_LIBS) {
- svc_run_diag_cmd(svc);
- }
- break;
- }
-
- DEBUG(SSSDBG_FATAL_FAILURE,
- "A service PING returned an error [%s], closing connection.\n",
- dbus_error_name);
- /* Falling through to default intentionally*/
- default:
- /*
- * Timeout or other error occurred or something
- * unexpected happened.
- * It doesn't matter which, because either way we
- * know that this connection isn't trustworthy.
- * We'll destroy it now.
- */
- sbus_disconnect(svc->conn);
- }
-
-done:
- dbus_pending_call_unref(pending);
- dbus_message_unref(reply);
-}
-
static void service_startup_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr);
@@ -2840,7 +2623,6 @@ static void service_startup_handler(struct tevent_context *ev,
/* Parent */
mt_svc->mt_ctx->check_children = true;
- mt_svc->failed_pongs = 0;
/* Handle process exit */
ret = sss_child_register(mt_svc,
@@ -2858,7 +2640,6 @@ static void service_startup_handler(struct tevent_context *ev,
}
DLIST_ADD(mt_svc->mt_ctx->svc_list, mt_svc);
- set_tasks_checker(mt_svc);
return;
}
diff --git a/src/monitor/monitor_iface.xml b/src/monitor/monitor_iface.xml
index 3d0e67f71..5d6b16f32 100644
--- a/src/monitor/monitor_iface.xml
+++ b/src/monitor/monitor_iface.xml
@@ -15,7 +15,7 @@
<interface name="org.freedesktop.sssd.service">
<annotation value="mon_cli_iface" name="org.freedesktop.DBus.GLib.CSymbol"/>
- <method name="ping">
+ <method name="ping"> <!-- deprecated -->
<!-- no arguments, raw handler -->
<annotation name="org.freedesktop.sssd.RawHandler" value="true"/>
</method>