From 5f3ff9b33c286b30f2c6d2fadf53757b9f48859e Mon Sep 17 00:00:00 2001 From: Gaurav Yadav Date: Fri, 18 Aug 2017 23:06:13 +0530 Subject: glusterd: replace-brick executing successfully when quorum does not met Problem: replace-brick command on a setup where quorum does not met executing successfully. Fix: With the fix glusterd is validating whether server is in quorum or not during replace-brick staging Change-Id: I8017154bb62bdcc6c6490e720ecfe9cde090c161 BUG: 1483058 Signed-off-by: Gaurav Yadav Reviewed-on: https://review.gluster.org/18068 Smoke: Gluster Build System CentOS-regression: Gluster Build System Reviewed-by: Atin Mukherjee --- .../bug-1483058-replace-brick-quorum-validation.t | 51 ++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 9 ++++ 2 files changed, 60 insertions(+) create mode 100644 tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t diff --git a/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t b/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t new file mode 100644 index 0000000000..28b2fbbb9c --- /dev/null +++ b/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t @@ -0,0 +1,51 @@ +#!/bin/bash + +# Test case for quorum validation in glusterd for syncop framework + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + + +cleanup; + +TEST launch_cluster 3 + +TEST $CLI_1 peer probe $H2; +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +TEST $CLI_1 peer probe $H3; +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +# Lets create the volume and set quorum type as a server +TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H2:$B2/${V0}1 $H3:$B3/${V0}2 +TEST $CLI_1 volume set $V0 cluster.server-quorum-type server + +# Start the volume +TEST $CLI_1 volume start $V0 + +# Set quorum ratio 95. means 95 % or more than 95% nodes of total available node +# should be available for performing volume operation. +# i.e. Server-side quorum is met if the number of nodes that are available is +# greater than or equal to 'quorum-ratio' times the number of nodes in the +# cluster + +TEST $CLI_1 volume set all cluster.server-quorum-ratio 95 +# Bring down 2nd glusterd +TEST kill_glusterd 2 + +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +# Now quorum is not meet. Now execute replace-brick command +# This command should fail as cluster is not in quorum +TEST ! $CLI_1 volume replace-brick $V0 $H2:$B2/${V0}1 $H1:$B1/${V0}1_new commit force + +# Bring 2nd glusterd up +TEST start_glusterd 2 + +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +# Now quorum is met. replace-brick will execute successfuly +TEST $CLI_1 volume replace-brick $V0 $H2:$B2/${V0}1 $H1:$B1/${V0}1_new commit force + +#cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 887c349647..ab38725ffb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -21,6 +21,7 @@ #include "glusterd-nfs-svc.h" #include "glusterd-volgen.h" #include "glusterd-messages.h" +#include "glusterd-server-quorum.h" #include "glusterd-mgmt.h" #include "run.h" #include "syscall.h" @@ -210,6 +211,14 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (ret) goto out; + ret = glusterd_validate_quorum (this, gd_op, dict, op_errstr); + if (ret) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + goto out; + } + if (strcmp (op, "GF_REPLACE_OP_COMMIT_FORCE")) { ret = -1; goto out; -- cgit