glusterd: Rebalance cli is not showing correct status after reboot (#2172)

Rebalance cli is not showing correct status after reboot. The CLI is not correct status because defrag object is not valid at the time of creating a rpc connection to show the status. The defrag object is not valid because at the time of start a glusterd glusterd_restart_rebalance can be call almost at the same time by two different synctask and glusterd got a disconnect on rpc object and it cleanup the defrag object. Solution: To avoid the defrag object populate a reference count before create a defrag rpc object. Fixes: #1339 Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
author: mohit84 <moagrawa@redhat.com> 2021-02-18 09:40:44 +0530
committer: GitHub <noreply@github.com> 2021-02-18 09:40:44 +0530
commit: 61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea (patch)
tree: 456d7786501c549d5208421c832352ca3ff20be4 /xlators
parent: 1f8247c54c47bb24b862786c80e5ce865683b8ec (diff)
download: glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.tar.gz
glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.tar.xz
glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.zip
5 files changed, 91 insertions, 11 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index d5551ab933..def5420f85 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -101,6 +101,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
     glusterd_conf_t *priv = NULL;
     xlator_t *this = THIS;
     int pid = -1;
+    int refcnt = 0;
 
     priv = this->private;
     if (!priv)
@@ -136,11 +137,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
         }
 
         case RPC_CLNT_DISCONNECT: {
-            if (!defrag->connected)
-                return 0;
-
             LOCK(&defrag->lock);
             {
+                if (!defrag->connected) {
+                    UNLOCK(&defrag->lock);
+                    return 0;
+                }
                 defrag->connected = 0;
             }
             UNLOCK(&defrag->lock);
@@ -157,11 +159,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
             glusterd_defrag_rpc_put(defrag);
             if (defrag->cbk_fn)
                 defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
-
-            GF_FREE(defrag);
+            refcnt = glusterd_defrag_unref(defrag);
             gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
-                   "Rebalance process for volume %s has disconnected.",
-                   volinfo->volname);
+                   "Rebalance process for volume %s has disconnected"
+                   " and defrag refcnt is %d.",
+                   volinfo->volname, refcnt);
             break;
         }
         case RPC_CLNT_DESTROY:
@@ -323,7 +325,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
         gf_msg_debug("glusterd", 0, "rebalance command failed");
         goto out;
     }
-
+    /* Take reference before sleep to save defrag object cleanup while
+       glusterd_restart_rebalance call for other bricks by syncktask
+       at the time of restart a glusterd.
+    */
+    glusterd_defrag_ref(defrag);
     sleep(5);
 
     ret = glusterd_rebalance_rpc_create(volinfo);
@@ -379,6 +385,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
     };
     int ret = -1;
     glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
+    struct rpc_clnt *rpc = NULL;
 
     // rebalance process is not started
     if (!defrag)
@@ -405,13 +412,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
     }
 
     glusterd_volinfo_ref(volinfo);
-    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
-                              volinfo, _gf_true);
+    ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
+                              _gf_false);
     if (ret) {
         gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
                "Glusterd RPC creation failed");
         goto out;
     }
+    LOCK(&defrag->lock);
+    {
+        if (!defrag->rpc)
+            defrag->rpc = rpc;
+        else
+            rpc_clnt_unref(rpc);
+    }
+    UNLOCK(&defrag->lock);
     ret = 0;
 out:
     if (options)
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index f84b9de133..e9f8187a19 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -1720,6 +1720,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
         if (!rpc) {
             if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
                 volinfo = pending_node->node;
+                glusterd_defrag_ref(volinfo->rebal.defrag);
                 ret = glusterd_rebalance_rpc_create(volinfo);
                 if (ret) {
                     ret = 0;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index eceb19a632..71d546e904 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -120,6 +120,44 @@
         }                                                                      \
     } while (0)
 
+int
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
+{
+    int refcnt = 0;
+
+    if (!defrag)
+        goto out;
+
+    LOCK(&defrag->lock);
+    {
+        refcnt = ++defrag->refcnt;
+    }
+    UNLOCK(&defrag->lock);
+
+out:
+    return refcnt;
+}
+
+int
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
+{
+    int refcnt = -1;
+
+    if (!defrag)
+        goto out;
+
+    LOCK(&defrag->lock);
+    {
+        refcnt = --defrag->refcnt;
+        if (refcnt <= 0)
+            GF_FREE(defrag);
+    }
+    UNLOCK(&defrag->lock);
+
+out:
+    return refcnt;
+}
+
 gf_boolean_t
 is_brick_mx_enabled(void)
 {
@@ -9497,6 +9535,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
     char pidfile[PATH_MAX] = "";
     int ret = -1;
     pid_t pid = 0;
+    int refcnt = 0;
 
     priv = this->private;
     if (!priv)
@@ -9528,7 +9567,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
                              volinfo->volname);
                     goto out;
                 }
-                ret = glusterd_rebalance_rpc_create(volinfo);
+                refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
+                /* If refcnt value is 1 it means either defrag object is
+                   poulated by glusterd_rebalance_defrag_init or previous
+                   rpc creation was failed.If it is not 1 it means it(defrag)
+                   was populated at the time of start a rebalance daemon.
+                   We need to create a rpc object only while a previous
+                   rpc connection was not established successfully at the
+                   time of restart a rebalance daemon by
+                   glusterd_handle_defrag_start otherwise rebalance cli
+                   does not show correct status after just reboot a node and try
+                   to print the rebalance status because defrag object has been
+                   destroyed during handling of rpc disconnect.
+                */
+                if (refcnt == 1) {
+                    ret = glusterd_rebalance_rpc_create(volinfo);
+                } else {
+                    ret = 0;
+                    glusterd_defrag_unref(volinfo->rebal.defrag);
+                }
                 break;
             }
         case GF_DEFRAG_STATUS_NOT_STARTED:
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index c75f30b22d..7b4e0b95b8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -868,4 +868,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
                            int32_t sub_count, int flag);
 gf_boolean_t
 glusterd_gf_is_local_addr(char *hostname);
+
+int
+glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
+
+int
+glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
 #endif
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index fca4ea4ca7..c82804e899 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -333,6 +333,7 @@ struct glusterd_defrag_info_ {
     uint64_t total_data;
     uint64_t num_files_lookedup;
     uint64_t total_failures;
+    int refcnt;
     gf_lock_t lock;
     int cmd;
     uint32_t connected;
author	mohit84 <moagrawa@redhat.com>	2021-02-18 09:40:44 +0530
committer	GitHub <noreply@github.com>	2021-02-18 09:40:44 +0530
commit	61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea (patch)
tree	456d7786501c549d5208421c832352ca3ff20be4 /xlators
parent	1f8247c54c47bb24b862786c80e5ce865683b8ec (diff)
download	glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.tar.gz glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.tar.xz glusterfs-61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea.zip