diff options
author | mohit84 <moagrawa@redhat.com> | 2021-02-18 09:40:44 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-18 09:40:44 +0530 |
commit | 61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea (patch) | |
tree | 456d7786501c549d5208421c832352ca3ff20be4 /xlators | |
parent | 1f8247c54c47bb24b862786c80e5ce865683b8ec (diff) | |
download | glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.tar.gz glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.tar.xz glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.zip |
glusterd: Rebalance cli is not showing correct status after reboot (#2172)
Rebalance cli is not showing correct status after reboot.
The CLI is not correct status because defrag object is not
valid at the time of creating a rpc connection to show the status.
The defrag object is not valid because at the time of start a glusterd
glusterd_restart_rebalance can be call almost at the same time by two
different synctask and glusterd got a disconnect on rpc object and it
cleanup the defrag object.
Solution: To avoid the defrag object populate a reference count before
create a defrag rpc object.
Fixes: #1339
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 59 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 6 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 |
5 files changed, 91 insertions, 11 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index d5551ab933..def5420f85 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -101,6 +101,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, glusterd_conf_t *priv = NULL; xlator_t *this = THIS; int pid = -1; + int refcnt = 0; priv = this->private; if (!priv) @@ -136,11 +137,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, } case RPC_CLNT_DISCONNECT: { - if (!defrag->connected) - return 0; - LOCK(&defrag->lock); { + if (!defrag->connected) { + UNLOCK(&defrag->lock); + return 0; + } defrag->connected = 0; } UNLOCK(&defrag->lock); @@ -157,11 +159,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, glusterd_defrag_rpc_put(defrag); if (defrag->cbk_fn) defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status); - - GF_FREE(defrag); + refcnt = glusterd_defrag_unref(defrag); gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED, - "Rebalance process for volume %s has disconnected.", - volinfo->volname); + "Rebalance process for volume %s has disconnected" + " and defrag refcnt is %d.", + volinfo->volname, refcnt); break; } case RPC_CLNT_DESTROY: @@ -323,7 +325,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, gf_msg_debug("glusterd", 0, "rebalance command failed"); goto out; } - + /* Take reference before sleep to save defrag object cleanup while + glusterd_restart_rebalance call for other bricks by syncktask + at the time of restart a glusterd. + */ + glusterd_defrag_ref(defrag); sleep(5); ret = glusterd_rebalance_rpc_create(volinfo); @@ -379,6 +385,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) }; int ret = -1; glusterd_defrag_info_t *defrag = volinfo->rebal.defrag; + struct rpc_clnt *rpc = NULL; // rebalance process is not started if (!defrag) @@ -405,13 +412,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) } glusterd_volinfo_ref(volinfo); - ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify, - volinfo, _gf_true); + ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo, + _gf_false); if (ret) { gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL, "Glusterd RPC creation failed"); goto out; } + LOCK(&defrag->lock); + { + if (!defrag->rpc) + defrag->rpc = rpc; + else + rpc_clnt_unref(rpc); + } + UNLOCK(&defrag->lock); ret = 0; out: if (options) diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index f84b9de133..e9f8187a19 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -1720,6 +1720,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, if (!rpc) { if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { volinfo = pending_node->node; + glusterd_defrag_ref(volinfo->rebal.defrag); ret = glusterd_rebalance_rpc_create(volinfo); if (ret) { ret = 0; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index eceb19a632..71d546e904 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -120,6 +120,44 @@ } \ } while (0) +int +glusterd_defrag_ref(glusterd_defrag_info_t *defrag) +{ + int refcnt = 0; + + if (!defrag) + goto out; + + LOCK(&defrag->lock); + { + refcnt = ++defrag->refcnt; + } + UNLOCK(&defrag->lock); + +out: + return refcnt; +} + +int +glusterd_defrag_unref(glusterd_defrag_info_t *defrag) +{ + int refcnt = -1; + + if (!defrag) + goto out; + + LOCK(&defrag->lock); + { + refcnt = --defrag->refcnt; + if (refcnt <= 0) + GF_FREE(defrag); + } + UNLOCK(&defrag->lock); + +out: + return refcnt; +} + gf_boolean_t is_brick_mx_enabled(void) { @@ -9497,6 +9535,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, char pidfile[PATH_MAX] = ""; int ret = -1; pid_t pid = 0; + int refcnt = 0; priv = this->private; if (!priv) @@ -9528,7 +9567,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, volinfo->volname); goto out; } - ret = glusterd_rebalance_rpc_create(volinfo); + refcnt = glusterd_defrag_ref(volinfo->rebal.defrag); + /* If refcnt value is 1 it means either defrag object is + poulated by glusterd_rebalance_defrag_init or previous + rpc creation was failed.If it is not 1 it means it(defrag) + was populated at the time of start a rebalance daemon. + We need to create a rpc object only while a previous + rpc connection was not established successfully at the + time of restart a rebalance daemon by + glusterd_handle_defrag_start otherwise rebalance cli + does not show correct status after just reboot a node and try + to print the rebalance status because defrag object has been + destroyed during handling of rpc disconnect. + */ + if (refcnt == 1) { + ret = glusterd_rebalance_rpc_create(volinfo); + } else { + ret = 0; + glusterd_defrag_unref(volinfo->rebal.defrag); + } break; } case GF_DEFRAG_STATUS_NOT_STARTED: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index c75f30b22d..7b4e0b95b8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -868,4 +868,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, int32_t sub_count, int flag); gf_boolean_t glusterd_gf_is_local_addr(char *hostname); + +int +glusterd_defrag_ref(glusterd_defrag_info_t *defrag); + +int +glusterd_defrag_unref(glusterd_defrag_info_t *defrag); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index fca4ea4ca7..c82804e899 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -333,6 +333,7 @@ struct glusterd_defrag_info_ { uint64_t total_data; uint64_t num_files_lookedup; uint64_t total_failures; + int refcnt; gf_lock_t lock; int cmd; uint32_t connected; |