From 828ebf6608555b7e36fcfc8b899b803c0913fb43 Mon Sep 17 00:00:00 2001 From: William Brown Date: Sep 10 2019 23:31:32 +0000 Subject: Ticket 50584, 49212 - docker healthcheck and configuration Bug Description: Docker is managed by providing values from the environment. To know if this is ready to make changes to our local instance, we need to be able to check the instance is healthy. In addition, docker has a health check process which can allow monitoring and management of instances as they start. Fix Description: This provides a healthcheck tool and allows configuration by the envirnoment for the directory manager password, and allows indicating via the env to perform a db2index on startup. https://pagure.io/389-ds-base/issue/49212 https://pagure.io/389-ds-base/issue/50584 Author: William Brown Review by: ??? --- diff --git a/docker/389-ds-suse/Dockerfile b/docker/389-ds-suse/Dockerfile index 9db8b2d..1e56e1f 100644 --- a/docker/389-ds-suse/Dockerfile +++ b/docker/389-ds-suse/Dockerfile @@ -75,4 +75,8 @@ VOLUME /data # Set the userup correctly. # USER dirsrv +HEALTHCHECK --start-period=5m --timeout=5s --interval=5s --retries=2 \ + CMD /usr/sbin/dscontainer -H + CMD [ "/usr/sbin/dscontainer", "-r" ] + diff --git a/src/lib389/cli/dscontainer b/src/lib389/cli/dscontainer index 7503b82..7bea9ba 100755 --- a/src/lib389/cli/dscontainer +++ b/src/lib389/cli/dscontainer @@ -27,6 +27,7 @@ import grp import pwd import atexit import os +import time import signal import sys import subprocess @@ -42,6 +43,8 @@ from lib389.passwd import password_generate from lib389.paths import Paths from lib389._constants import DSRC_CONTAINER +from lib389.idm.directorymanager import DirectoryManager + # We setup the logger in verbose mode to make sure debug info # is always available! log = setup_script_logger("container-init", True) @@ -52,9 +55,34 @@ log = setup_script_logger("container-init", True) # We take *args and **kwargs here to handle the fact that this signal gets args, but # we don't need or care about them. def _sigchild_handler(*args, **kwargs): - log.debug("Received SIGCHLD ...") + # log.debug("Received SIGCHLD ...") os.waitpid(-1, os.WNOHANG) +def _gen_instance(): + inst = DirSrv(verbose=True) + inst.local_simple_allocate("localhost") + inst.setup_ldapi() + return inst + +def _begin_environment_config(): + inst = _gen_instance() + inst.open() + # TODO: Should we reset cn=Directory Manager from env? + dm_pass = os.getenv("DS_DM_PASSWORD", None) + if dm_pass is not None: + dm = DirectoryManager(inst) + dm.change_password(dm_pass) + # TODO: Should we set replica id from env? + # TODO: Should we set replication agreements from env? + + inst.close() + +def _begin_check_reindex(): + if os.getenv('DS_REINDEX', None) is not None: + log.info("Reindexing database. This may take a while ...") + inst = _gen_instance() + inst.db2index() + def begin_magic(): log.info("The 389 Directory Server Container Bootstrap") # Leave this comment here: UofA let me take this code with me provided @@ -182,29 +210,26 @@ binddn = cn=Directory Manager """) os.chmod(DSRC_CONTAINER, 0o755) - # TODO: All of this is contingent on the server starting *and* - # ldapi working ... Perhaps these are better inside ns-slapd core - # and we just proxy/filter the env through? - # TODO: Should we reset cn=Directory Manager from env? - # TODO: Should we set replica id from env? - # TODO: Should we set replication agreements from env? - # TODO: Should we allow re-indexing at startup from env? + # If we have been requested to re-index, do so now ... + _begin_check_reindex() # Yep! Run it ... # Now unlike a normal lib389 start, we use subprocess and don't fork! # TODO: Should we pass in a loglevel from env? log.info("Starting 389-ds-container ...") + # We can't use the instance "start" because we need the pid handler so we can do + # a wait/block on it. That's why we do the Popen here direct. global ds_proc ds_proc = subprocess.Popen([ "%s/ns-slapd" % paths.sbin_dir, "-D", paths.config_dir, + "-i", "/data/run/slapd-localhost.pid", # See /ldap/servers/slapd/slap.h SLAPD_DEFAULT_ERRORLOG_LEVEL "-d", "266354688", ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # To make sure we really do shutdown, we actually re-block on the proc - # again here to be sure it's done. + # Setup the process and shutdown handler in an init-esque fashion. def kill_ds(): if ds_proc is None: pass @@ -215,17 +240,53 @@ binddn = cn=Directory Manager # It's already gone ... pass log.info("STOPPING: Shutting down 389-ds-container ...") + # To make sure we really do shutdown, we actually re-block on the proc + # again here to be sure it's done. ds_proc.wait() atexit.register(kill_ds) - # Now wait ... + # Wait on the health check to show we are ready for ldapi. + failure_count = 0 + max_failure_count = 5 + for i in range(0, max_failure_count): + status = begin_healthcheck() + if status is True: + break + failure_count += 1 + time.sleep(3) + if failure_count == max_failure_count: + log.error("389-ds-container failed to start") + sys.exit(1) + + # Now via ldapi, set some values. + log.info("Applying environment configuration (if present) ...") + _begin_environment_config() + + log.info("389-ds-container started.") + + # Now block until we get shutdown! If we are signaled to exit, this + # will trigger the atexit handler from above. try: ds_proc.wait() except KeyboardInterrupt: pass # THE LETTER OF THE DAY IS C AND THE NUMBER IS 10 + +def begin_healthcheck(): + # Is there an ns-slapd pid? + # Can we get ldapi response? + inst = _gen_instance() + if inst.status() is not True: + return False + # Now do an ldapi check, make sure we are dm. + inst.open() + if "dn: cn=Directory Manager" == inst.whoami_s(): + return True + return False + + if __name__ == '__main__': # Before all else, we are INIT so setup sigchild signal.signal(signal.SIGCHLD, _sigchild_handler) @@ -259,10 +320,20 @@ container host. parser.add_argument('-r', '--runit', help="Actually run the instance! You understand what that means ...", action='store_true', default=False, dest='runit') + parser.add_argument('-H', '--healthcheck', + help="Start a healthcheck inside of the container for an instance. You should understand what this means ...", + action='store_true', default=False, dest='healthcheck') + argcomplete.autocomplete(parser) args = parser.parse_args() if args.runit: begin_magic() + elif args.healthcheck: + if begin_healthcheck() is True: + sys.exit(0) + else: + sys.exit(1) + diff --git a/src/lib389/lib389/__init__.py b/src/lib389/lib389/__init__.py index 8e6eb66..fb46dfd 100644 --- a/src/lib389/lib389/__init__.py +++ b/src/lib389/lib389/__init__.py @@ -465,6 +465,11 @@ class DirSrv(SimpleLDAPObject, object): self.state = DIRSRV_STATE_ALLOCATED self.log.debug("Allocate local instance %s with %s", self.__class__, self.ldapuri) + def setup_ldapi(self): + self.ldapi_enabled = "on" + self.ldapi_socket = self.ds_paths.ldapi + self.ldapi_autobind = "on" + def remote_simple_allocate(self, ldapuri, binddn='cn=Directory Manager', password=None): """Allocate an instance, and perform a simple bind. This instance is remote, so local tasks will not operate. @@ -1219,15 +1224,15 @@ class DirSrv(SimpleLDAPObject, object): "dirsrv@%s" % self.serverid]) if rc == 0: return True - # This .... probably will mess something up + # We don't reset the state here because we don't know what state + # we are in re shutdown. The state is for us internally anyway. # self.state = DIRSRV_STATE_RUNNING self.state = DIRSRV_STATE_OFFLINE return False else: self.log.debug("systemd status -> False") - # TODO: Make the pid path in the files things - # TODO: use the status call instead!!!! pid = pid_from_file(self.ds_paths.pid_file) + self.log.debug("pid file -> %s" % pid) if pid is None: self.log.debug("No pidfile found for %s", self.serverid) # No pidfile yet ... @@ -1541,7 +1546,7 @@ class DirSrv(SimpleLDAPObject, object): if self.ldapuri: return self.ldapuri elif self.ldapi_enabled == 'on' and self.ldapi_socket is not None: - return "ldapi://%s" % (ldapurl.ldapUrlEscape(ensure_str(ldapi_socket))) + return "ldapi://%s" % (ldapurl.ldapUrlEscape(ensure_str(self.ldapi_socket))) elif self.sslport and not self.realm: # Gssapi can't use SSL so we have to nuke it here. return "ldaps://%s:%d/" % (ensure_str(self.host), self.sslport) diff --git a/src/lib389/lib389/paths.py b/src/lib389/lib389/paths.py index ef0ace9..b1734ed 100644 --- a/src/lib389/lib389/paths.py +++ b/src/lib389/lib389/paths.py @@ -9,7 +9,7 @@ import sys import os -from lib389._constants import DIRSRV_STATE_ONLINE +from lib389._constants import DIRSRV_STATE_ONLINE, DSRC_CONTAINER MAJOR, MINOR, _, _, _ = sys.version_info @@ -108,6 +108,7 @@ class Paths(object): to know about paths, shouldn't need to have a copy of 389-ds-base installed to remotely admin a server. """ + self._is_container = os.path.exists(DSRC_CONTAINER) self._defaults_cached = False self._config = None self._serverid = serverid @@ -131,6 +132,10 @@ class Paths(object): spath = self._get_defaults_loc(DEFAULTS_PATH) self._config = configparser.ConfigParser() self._config.read([spath]) + if self._is_container: + # Load some values over the top that are container specific + self._config.set(SECTION, "pid_file", "/data/run/slapd-localhost.pid") + self._config.set(SECTION, "ldapi", "/data/run/slapd-localhost.socket") self._defaults_cached = True def _validate_defaults(self): @@ -175,6 +180,9 @@ class Paths(object): if self._defaults_cached is False: self._read_defaults() self._validate_defaults() + if self._is_container: + # We never have systemd in a container, so check the marker. + return False if self._config.has_option(SECTION, 'with_systemd'): if self._config.get(SECTION, 'with_systemd') == '1': return True