diff options
| author | Kotresh H R <khiremat@redhat.com> | 2014-05-25 23:41:48 +0530 |
|---|---|---|
| committer | Venky Shankar <vshankar@redhat.com> | 2014-06-05 22:58:26 -0700 |
| commit | 77498fdbbca8554880eae4b8f559b9d6876e35b7 (patch) | |
| tree | 29662e2b91634c1abd1c107d5aeeb1d2a4434698 | |
| parent | 535003ca20a9dd00a09dd34ad26947d888aabe39 (diff) | |
feature/geo-rep: Fix to retain pause state of gsyncd on restart.
A new gsyncd options '--pause-on-start' is introduced. When node
reboots, if the status is paused, gsyncd is started with this
option. After gsyncd spawns worker and agent, worker will send
SIGSTOP to negative pid of monitor to enter pause mode.
Change-Id: I5aad82c9a9fc8c243f384940b77d25e26e520d6d
BUG: 1101410
Signed-off-by: Kotresh H R <khiremat@redhat.com>
Reviewed-on: http://review.gluster.org/7885
Reviewed-by: Aravinda VK <avishwan@redhat.com>
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Tested-by: Venky Shankar <vshankar@redhat.com>
| -rw-r--r-- | geo-replication/syncdaemon/gsyncd.py | 1 | ||||
| -rw-r--r-- | geo-replication/syncdaemon/monitor.py | 9 | ||||
| -rw-r--r-- | geo-replication/syncdaemon/resource.py | 8 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 5 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 19 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 2 |
6 files changed, 33 insertions, 11 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 7d463ad23f..7ddd51267a 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -227,6 +227,7 @@ def main_i(): op.add_option('--ignore-deletes', default=False, action='store_true') op.add_option('--isolated-slave', default=False, action='store_true') op.add_option('--use-rsync-xattrs', default=False, action='store_true') + op.add_option('--pause-on-start', default=False, action='store_true') op.add_option('-L', '--log-level', metavar='LVL') op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0])) diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index f485fe1860..f3700c1a39 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -73,10 +73,11 @@ class Monitor(object): """class which spawns and manages gsyncd workers""" ST_INIT = 'Initializing...' + ST_INIT_PAUSE = 'Initializing...(Paused)' ST_STABLE = 'Stable' ST_FAULTY = 'faulty' ST_INCON = 'inconsistent' - _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON] + _ST_ORD = [ST_STABLE, ST_INIT, ST_INIT_PAUSE, ST_FAULTY, ST_INCON] def __init__(self): self.lock = Lock() @@ -128,7 +129,11 @@ class Monitor(object): due to the keep-alive thread) """ - self.set_state(self.ST_INIT, w) + if gconf.pause_on_start: + self.set_state(self.ST_INIT_PAUSE, w) + else: + self.set_state(self.ST_INIT, w) + ret = 0 def nwait(p, o=0): diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index dadfc96533..8192a54b0d 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -13,6 +13,7 @@ import os import sys import stat import time +import signal import fcntl import errno import types @@ -1290,6 +1291,13 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): logging.debug("Changelog register failed: %s - %s" % (e.errno, e.strerror)) + # Check if gsyncd restarted in pause state. If + # yes, send SIGSTOP to negative of monitor pid + # to go back to pause state. + if gconf.pause_on_start: + os.kill(-os.getppid(), signal.SIGSTOP) + gconf.pause_on_start = False + # oneshot: Try to use changelog history api, if not # available switch to FS crawl # Note: if config.change_detector is xsync then diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 3e2e308ec1..aa3cc99fbf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -3724,7 +3724,7 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, if (ret == 0) ret = glusterd_start_gsync (volinfo, slave, path_list, conf_path, uuid_utoa(MY_UUID), - NULL); + NULL, _gf_false); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -4499,7 +4499,8 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) } ret = glusterd_start_gsync (volinfo, slave, path_list, - conf_path, host_uuid, op_errstr); + conf_path, host_uuid, op_errstr, + _gf_false); } if (type == GF_GSYNC_OPTION_TYPE_STOP || diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index a2a746d247..15e91ad24d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6572,6 +6572,7 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) char *op_errstr = NULL; glusterd_conf_t *priv = NULL; gf_boolean_t is_template_in_use = _gf_false; + gf_boolean_t is_paused = _gf_false; GF_ASSERT (THIS); priv = THIS->private; @@ -6665,9 +6666,9 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) "%s and %s::%s. Not Restarting", volinfo->volname, slave_ip, slave_vol); goto out; - } - - if ((!strcmp (buf, "Config Corrupted"))) { + } else if (strstr(buf, "Paused")) { + is_paused = _gf_true; + } else if ((!strcmp (buf, "Config Corrupted"))) { gf_log ("", GF_LOG_INFO, "Recovering from a corrupted config. " "Not Restarting. Use start (force) to " @@ -6677,8 +6678,12 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) goto out; } - glusterd_start_gsync (volinfo, slave, path_list, confpath, - uuid_str, NULL); + if (is_paused) + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL, _gf_true); + else + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL, _gf_false); out: if (statefile) @@ -8294,7 +8299,7 @@ int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, char *path_list, char *conf_path, char *glusterd_uuid_str, - char **op_errstr) + char **op_errstr, gf_boolean_t is_pause) { int32_t ret = 0; int32_t status = 0; @@ -8356,6 +8361,8 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, runner_argprintf (&runner, "--glusterd-uuid=%s", uuid_utoa (priv->uuid)); runner_add_arg (&runner, slave); + if (is_pause) + runner_add_arg (&runner, "--pause-on-start"); synclock_unlock (&priv->big_lock); ret = runner_run (&runner); synclock_lock (&priv->big_lock); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 4b6e2b0cdb..834d4a5215 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -413,7 +413,7 @@ int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, char *path_list, char *conf_path, char *glusterd_uuid_str, - char **op_errstr); + char **op_errstr, gf_boolean_t is_pause); int glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist); |
