diff options
author | Pranith Kumar Karampuri <pranith.karampuri@phonepe.com> | 2021-02-09 08:32:50 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-09 08:32:50 +0530 |
commit | 6ca27dfba90224e9dc0c025e22599c759b2d671c (patch) | |
tree | 3bb952dbf29fd229729a9924a6bbbdb8899ae5e0 | |
parent | 2f27e2ca1041cd8a51b62487d6f731d6ccb09d98 (diff) | |
download | glusterfs-6ca27dfba90224e9dc0c025e22599c759b2d671c.tar.gz glusterfs-6ca27dfba90224e9dc0c025e22599c759b2d671c.tar.xz glusterfs-6ca27dfba90224e9dc0c025e22599c759b2d671c.zip |
cluster/dht: Perform migrate-file with lk-owner (#2105)
fixes: #1529
Change-Id: I7f233ff41cda400d335f6a70ba09be736770116d
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
-rw-r--r-- | tests/basic/distribute/manual-rebalance.t | 81 | ||||
-rw-r--r-- | tests/volume.rc | 4 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 17 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 22 |
4 files changed, 104 insertions, 20 deletions
diff --git a/tests/basic/distribute/manual-rebalance.t b/tests/basic/distribute/manual-rebalance.t new file mode 100644 index 0000000000..299b9f16fa --- /dev/null +++ b/tests/basic/distribute/manual-rebalance.t @@ -0,0 +1,81 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +#This tests checks if the manual rebalance happens simialr to normal rebalance + +TESTS_EXPECTED_IN_LOOP=10 +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}0 +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST mkdir $M0/d +declare -a checksums +for i in {1..10}; +do + TEST_IN_LOOP dd if=/dev/urandom of=$M0/d/$i bs=1M count=1 + checksums[$i]="$(md5sum $M0/d/$i | awk '{print $1}')" +done +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1 force +TEST $CLI volume rebalance $V0 fix-layout start +EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0 + +errors=0 +migrations=0 +for i in {1..10}; +do + setfattr -n trusted.distribute.migrate-data -v 1 $M0/d/$i 2>/dev/null + if [ $? -eq 0 ] #Migration happened for the file + then + if [ "${checksums[i]}" != "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ] + then + errors=$((errors+1)) #Data on new brick shouldn't change + else + migrations=$((migrations+1)) + fi + else #Migration is not applicable + if [ "${checksums[i]}" != "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ] + then + errors=$((errors+1)) #Data on old brick shouldn't change + fi + fi +done + +EXPECT_NOT "^0$" echo $migrations #At least one file should migrate +EXPECT "^0$" echo $errors + +#Test that rebalance crawl is equivalent to manual rebalance +TEST $CLI volume rebalance $V0 start +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 +EXPECT "^0$" rebalanced_files_field $V0 + + +#Do one final check that data didn't change after normal rebalance +success=0 +for i in {1..10} +do + if [ -f $B0/${V0}0/d/$i ] + then + if [ "${checksums[i]}" == "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ] + then + success=$((success+1)) + fi + else + if [ "${checksums[i]}" == "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ] + then + success=$((success+1)) + fi + fi +done + +EXPECT "^10$" echo $success +cleanup diff --git a/tests/volume.rc b/tests/volume.rc index bc768c9434..8054c05069 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -75,6 +75,10 @@ function rebalance_status_field { $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p } +function rebalanced_files_field { + $CLI volume rebalance $1 status | awk '{print $2}' | sed -n 3p +} + function fix-layout_status_field { #The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4. #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00'). diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 23a062aa66..43a21ff78b 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -5883,23 +5883,6 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; - ret = dht_start_rebalance_task(this, frame); if (!ret) return 0; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 4f63e76aad..ae88969e70 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2363,9 +2363,10 @@ rebalance_task(void *data) } static int -rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data) +rebalance_task_completion(int op_ret, call_frame_t *syncop_frame, void *data) { int32_t op_errno = EINVAL; + call_frame_t *setxattr_frame = data; if (op_ret == -1) { /* Failure of migration process, mostly due to write process. @@ -2385,7 +2386,9 @@ rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data) op_ret = -1; } - DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL); + DHT_STACK_UNWIND(setxattr, setxattr_frame, op_ret, op_errno, NULL); + GF_ASSERT(syncop_frame->local == NULL); + STACK_DESTROY(syncop_frame->root); return 0; } @@ -2393,9 +2396,22 @@ int dht_start_rebalance_task(xlator_t *this, call_frame_t *frame) { int ret = -1; + call_frame_t *syncop_frame = NULL; + + syncop_frame = copy_frame(frame); + if (!syncop_frame) { + goto out; + } + + syncop_frame->root->pid = GF_CLIENT_PID_DEFRAG; + set_lk_owner_from_ptr(&syncop_frame->root->lk_owner, syncop_frame->root); ret = synctask_new(this->ctx->env, rebalance_task, - rebalance_task_completion, frame, frame); + rebalance_task_completion, syncop_frame, frame); +out: + if ((ret < 0) && syncop_frame) { + STACK_DESTROY(syncop_frame->root); + } return ret; } |