glusterd: compare friend data within mutex

During friend handshake if the glusterd receives more than one friend updates, it might very well become possible that two threads would end up working on two different volinfo references and glusterd might end up updating the store with a old volinfo reference. While debugging glusterd crash from validating-server-quorum.t test file from the line-coverage regression the same was observed. Solution is to run glusterd_compare_friend_data under a mutex. Test: As the crash was more visible in the line-coverage run (given lcov does some instrumentation and exposes the races), 6 manual lcov runs were triggered starting from https://build.gluster.org/job/line-coverage/443 to https://build.gluster.org/job/line-coverage/449/ and no crash was observed from validating-server-quorum.t Change-Id: I86fce473a76fd24742d51bf17a685d28b90a8941 Fixes: bz#1603063 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
author: Atin Mukherjee <amukherj@redhat.com> 2018-08-10 09:12:05 +0530
committer: Atin Mukherjee <amukherj@redhat.com> 2018-08-13 03:01:42 +0000
commit: 29d5557854703f61a4aa1fc53d6b49de9a99fe9d (patch)
tree: bd9399bd9420ea52ad991e7a2ac4a7d482ba33a6
parent: 48b93c292c0069da9ac2fe77e66d08a1cdeacfdc (diff)
download: glusterfs-29d5557854703f61a4aa1fc53d6b49de9a99fe9d.tar.gz
glusterfs-29d5557854703f61a4aa1fc53d6b49de9a99fe9d.tar.xz
glusterfs-29d5557854703f61a4aa1fc53d6b49de9a99fe9d.zip
3 files changed, 48 insertions, 41 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index a952a4c179..cbd1bb0aae 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -937,54 +937,59 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
          */
 
         //Build comparison logic here.
-        ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
-                                            event->peername);
-        if (ret)
-                goto out;
-
-        if (GLUSTERD_VOL_COMP_RJT != status) {
-                event_type = GD_FRIEND_EVENT_LOCAL_ACC;
-                op_ret = 0;
-        } else {
-                event_type = GD_FRIEND_EVENT_LOCAL_RJT;
-                op_errno = GF_PROBE_VOLUME_CONFLICT;
-                op_ret = -1;
-        }
-
-        /* Compare missed_snapshot list with the peer *
-         * if volume comparison is successful */
-        if ((op_ret == 0) &&
-            (conf->op_version >= GD_OP_VERSION_3_6_0)) {
-                ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
+        pthread_mutex_lock (&conf->import_volumes);
+        {
+                ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
+                                                    event->peername);
                 if (ret) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
-                                "Failed to import peer's "
-                                "missed_snaps_list.");
-                        event_type = GD_FRIEND_EVENT_LOCAL_RJT;
-                        op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
-                        op_ret = -1;
+                        pthread_mutex_unlock (&conf->import_volumes);
+                        goto out;
                 }
 
-                /* glusterd_compare_friend_snapshots and functions only require
-                 * a peers hostname and uuid. It also does updates, which
-                 * require use of synchronize_rcu. So we pass the hostname and
-                 * id from the event instead of the peerinfo object to prevent
-                 * deadlocks as above.
-                 */
-                ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
-                                                         event->peername,
-                                                         event->peerid);
-                if (ret) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                GD_MSG_SNAP_COMPARE_CONFLICT,
-                                "Conflict in comparing peer's snapshots");
+                if (GLUSTERD_VOL_COMP_RJT != status) {
+                        event_type = GD_FRIEND_EVENT_LOCAL_ACC;
+                        op_ret = 0;
+                } else {
                         event_type = GD_FRIEND_EVENT_LOCAL_RJT;
-                        op_errno = GF_PROBE_SNAP_CONFLICT;
+                        op_errno = GF_PROBE_VOLUME_CONFLICT;
                         op_ret = -1;
                 }
-        }
 
+                /* Compare missed_snapshot list with the peer *
+                 * if volume comparison is successful */
+                if ((op_ret == 0) &&
+                    (conf->op_version >= GD_OP_VERSION_3_6_0)) {
+                        ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
+                        if (ret) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+                                        "Failed to import peer's "
+                                        "missed_snaps_list.");
+                                event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+                                op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
+                                op_ret = -1;
+                        }
+
+                        /* glusterd_compare_friend_snapshots and functions only require
+                         * a peers hostname and uuid. It also does updates, which
+                         * require use of synchronize_rcu. So we pass the hostname and
+                         * id from the event instead of the peerinfo object to prevent
+                         * deadlocks as above.
+                         */
+                        ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
+                                                                 event->peername,
+                                                                 event->peerid);
+                        if (ret) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_SNAP_COMPARE_CONFLICT,
+                                        "Conflict in comparing peer's snapshots");
+                                event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+                                op_errno = GF_PROBE_SNAP_CONFLICT;
+                                op_ret = -1;
+                        }
+                }
+        }
+        pthread_mutex_unlock (&conf->import_volumes);
         ret = glusterd_friend_sm_new_event (event_type, &new_event);
 
         if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index 0714714d33..f72953cd31 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1854,6 +1854,7 @@ init (xlator_t *this)
         synclock_init (&conf->big_lock, SYNC_LOCK_RECURSIVE);
         pthread_mutex_init (&conf->xprt_lock, NULL);
         INIT_LIST_HEAD (&conf->xprt_list);
+        pthread_mutex_init (&conf->import_volumes, NULL);
 
         glusterd_friend_sm_init ();
         glusterd_op_sm_init ();
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 66b7fbb229..39232d2d5e 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -162,6 +162,7 @@ typedef struct {
         struct cds_list_head     brick_procs; /* List of brick processes */
         pthread_mutex_t          xprt_lock;
         struct list_head         xprt_list;
+        pthread_mutex_t          import_volumes;
         gf_store_handle_t       *handle;
         gf_timer_t              *timer;
         glusterd_sm_tr_log_t     op_sm_log;
author	Atin Mukherjee <amukherj@redhat.com>	2018-08-10 09:12:05 +0530
committer	Atin Mukherjee <amukherj@redhat.com>	2018-08-13 03:01:42 +0000
commit	29d5557854703f61a4aa1fc53d6b49de9a99fe9d (patch)
tree	bd9399bd9420ea52ad991e7a2ac4a7d482ba33a6
parent	48b93c292c0069da9ac2fe77e66d08a1cdeacfdc (diff)
download	glusterfs-29d5557854703f61a4aa1fc53d6b49de9a99fe9d.tar.gz glusterfs-29d5557854703f61a4aa1fc53d6b49de9a99fe9d.tar.xz glusterfs-29d5557854703f61a4aa1fc53d6b49de9a99fe9d.zip