summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar Karampuri <pranith.karampuri@phonepe.com>2021-02-09 08:32:50 +0530
committerGitHub <noreply@github.com>2021-02-09 08:32:50 +0530
commit6ca27dfba90224e9dc0c025e22599c759b2d671c (patch)
tree3bb952dbf29fd229729a9924a6bbbdb8899ae5e0
parent2f27e2ca1041cd8a51b62487d6f731d6ccb09d98 (diff)
downloadglusterfs-6ca27dfba90224e9dc0c025e22599c759b2d671c.tar.gz
glusterfs-6ca27dfba90224e9dc0c025e22599c759b2d671c.tar.xz
glusterfs-6ca27dfba90224e9dc0c025e22599c759b2d671c.zip
cluster/dht: Perform migrate-file with lk-owner (#2105)
fixes: #1529 Change-Id: I7f233ff41cda400d335f6a70ba09be736770116d Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
-rw-r--r--tests/basic/distribute/manual-rebalance.t81
-rw-r--r--tests/volume.rc4
-rw-r--r--xlators/cluster/dht/src/dht-common.c17
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c22
4 files changed, 104 insertions, 20 deletions
diff --git a/tests/basic/distribute/manual-rebalance.t b/tests/basic/distribute/manual-rebalance.t
new file mode 100644
index 0000000000..299b9f16fa
--- /dev/null
+++ b/tests/basic/distribute/manual-rebalance.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests checks if the manual rebalance happens simialr to normal rebalance
+
+TESTS_EXPECTED_IN_LOOP=10
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/d
+declare -a checksums
+for i in {1..10};
+do
+ TEST_IN_LOOP dd if=/dev/urandom of=$M0/d/$i bs=1M count=1
+ checksums[$i]="$(md5sum $M0/d/$i | awk '{print $1}')"
+done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1 force
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+
+errors=0
+migrations=0
+for i in {1..10};
+do
+ setfattr -n trusted.distribute.migrate-data -v 1 $M0/d/$i 2>/dev/null
+ if [ $? -eq 0 ] #Migration happened for the file
+ then
+ if [ "${checksums[i]}" != "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ]
+ then
+ errors=$((errors+1)) #Data on new brick shouldn't change
+ else
+ migrations=$((migrations+1))
+ fi
+ else #Migration is not applicable
+ if [ "${checksums[i]}" != "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ]
+ then
+ errors=$((errors+1)) #Data on old brick shouldn't change
+ fi
+ fi
+done
+
+EXPECT_NOT "^0$" echo $migrations #At least one file should migrate
+EXPECT "^0$" echo $errors
+
+#Test that rebalance crawl is equivalent to manual rebalance
+TEST $CLI volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+EXPECT "^0$" rebalanced_files_field $V0
+
+
+#Do one final check that data didn't change after normal rebalance
+success=0
+for i in {1..10}
+do
+ if [ -f $B0/${V0}0/d/$i ]
+ then
+ if [ "${checksums[i]}" == "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ]
+ then
+ success=$((success+1))
+ fi
+ else
+ if [ "${checksums[i]}" == "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ]
+ then
+ success=$((success+1))
+ fi
+ fi
+done
+
+EXPECT "^10$" echo $success
+cleanup
diff --git a/tests/volume.rc b/tests/volume.rc
index bc768c9434..8054c05069 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -75,6 +75,10 @@ function rebalance_status_field {
$CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
}
+function rebalanced_files_field {
+ $CLI volume rebalance $1 status | awk '{print $2}' | sed -n 3p
+}
+
function fix-layout_status_field {
#The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
#But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 23a062aa66..43a21ff78b 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -5883,23 +5883,6 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
-
ret = dht_start_rebalance_task(this, frame);
if (!ret)
return 0;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 4f63e76aad..ae88969e70 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2363,9 +2363,10 @@ rebalance_task(void *data)
}
static int
-rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
+rebalance_task_completion(int op_ret, call_frame_t *syncop_frame, void *data)
{
int32_t op_errno = EINVAL;
+ call_frame_t *setxattr_frame = data;
if (op_ret == -1) {
/* Failure of migration process, mostly due to write process.
@@ -2385,7 +2386,9 @@ rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
op_ret = -1;
}
- DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL);
+ DHT_STACK_UNWIND(setxattr, setxattr_frame, op_ret, op_errno, NULL);
+ GF_ASSERT(syncop_frame->local == NULL);
+ STACK_DESTROY(syncop_frame->root);
return 0;
}
@@ -2393,9 +2396,22 @@ int
dht_start_rebalance_task(xlator_t *this, call_frame_t *frame)
{
int ret = -1;
+ call_frame_t *syncop_frame = NULL;
+
+ syncop_frame = copy_frame(frame);
+ if (!syncop_frame) {
+ goto out;
+ }
+
+ syncop_frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ set_lk_owner_from_ptr(&syncop_frame->root->lk_owner, syncop_frame->root);
ret = synctask_new(this->ctx->env, rebalance_task,
- rebalance_task_completion, frame, frame);
+ rebalance_task_completion, syncop_frame, frame);
+out:
+ if ((ret < 0) && syncop_frame) {
+ STACK_DESTROY(syncop_frame->root);
+ }
return ret;
}