diff options
author | Pranith Kumar Karampuri <pranith.karampuri@phonepe.com> | 2020-10-29 09:52:20 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-29 09:52:20 +0530 |
commit | 02074cfe3b02f229f0c0ff468debf5e29adfdb62 (patch) | |
tree | f27f2b3c8a8f9de4922fb23276792577c7956c9e | |
parent | fd8546370a95c216df08cf2901705e6ba989fa16 (diff) | |
download | glusterfs-02074cfe3b02f229f0c0ff468debf5e29adfdb62.tar.gz glusterfs-02074cfe3b02f229f0c0ff468debf5e29adfdb62.tar.xz glusterfs-02074cfe3b02f229f0c0ff468debf5e29adfdb62.zip |
cluster/dht: Perform migrate-file with lk-owner (#1581)
* cluster/dht: Perform migrate-file with lk-owner
1) Added GF_ASSERT() calls in client-xlator to find these
issues sooner.
2) Fuse is setting zero-lkowner with len as 8 when the fop
doesn't have any lk-owner. Changed this to have len as 0
just as we have in fops triggered from xlators lower to
fuse.
* syncop: Avoid frame allocation if we can
* cluster/dht: Set lkowner in daemon rebalance code path
* cluster/afr: Set lkowner for ta-selfheal
* cluster/ec: Destroy frame after heal is done
* Don't assert for lk-owner in lk call
* set lkowner for mandatory lock heal tests
fixes: #1529
Change-Id: Ia803db6b00869316893abb1cf435b898eec31228
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
-rw-r--r-- | libglusterfs/src/syncop.c | 6 | ||||
-rw-r--r-- | tests/basic/distribute/manual-rebalance.t | 81 | ||||
-rw-r--r-- | tests/basic/fencing/afr-lock-heal-advanced.c | 7 | ||||
-rw-r--r-- | tests/basic/fencing/afr-lock-heal-basic.c | 7 | ||||
-rw-r--r-- | tests/volume.rc | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 2 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 28 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 7 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-helpers.c | 3 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.c | 4 |
11 files changed, 143 insertions, 12 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index df20cec559..d717faf5a4 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -358,7 +358,7 @@ synctask_destroy(struct synctask *task) GF_FREE(task->stack); - if (task->opframe) + if (task->opframe && (task->opframe != task->frame)) STACK_DESTROY(task->opframe->root); if (task->synccbk == NULL) { @@ -441,7 +441,7 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, set_lk_owner_from_ptr(&newtask->opframe->root->lk_owner, newtask->opframe->root); } else { - newtask->opframe = copy_frame(frame); + newtask->opframe = frame; } if (!newtask->opframe) goto err; @@ -503,7 +503,7 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, err: if (newtask) { GF_FREE(newtask->stack); - if (newtask->opframe) + if (newtask->opframe && (newtask->opframe != newtask->frame)) STACK_DESTROY(newtask->opframe->root); GF_FREE(newtask); } diff --git a/tests/basic/distribute/manual-rebalance.t b/tests/basic/distribute/manual-rebalance.t new file mode 100644 index 0000000000..299b9f16fa --- /dev/null +++ b/tests/basic/distribute/manual-rebalance.t @@ -0,0 +1,81 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +#This tests checks if the manual rebalance happens simialr to normal rebalance + +TESTS_EXPECTED_IN_LOOP=10 +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}0 +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST mkdir $M0/d +declare -a checksums +for i in {1..10}; +do + TEST_IN_LOOP dd if=/dev/urandom of=$M0/d/$i bs=1M count=1 + checksums[$i]="$(md5sum $M0/d/$i | awk '{print $1}')" +done +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1 force +TEST $CLI volume rebalance $V0 fix-layout start +EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0 + +errors=0 +migrations=0 +for i in {1..10}; +do + setfattr -n trusted.distribute.migrate-data -v 1 $M0/d/$i 2>/dev/null + if [ $? -eq 0 ] #Migration happened for the file + then + if [ "${checksums[i]}" != "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ] + then + errors=$((errors+1)) #Data on new brick shouldn't change + else + migrations=$((migrations+1)) + fi + else #Migration is not applicable + if [ "${checksums[i]}" != "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ] + then + errors=$((errors+1)) #Data on old brick shouldn't change + fi + fi +done + +EXPECT_NOT "^0$" echo $migrations #At least one file should migrate +EXPECT "^0$" echo $errors + +#Test that rebalance crawl is equivalent to manual rebalance +TEST $CLI volume rebalance $V0 start +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 +EXPECT "^0$" rebalanced_files_field $V0 + + +#Do one final check that data didn't change after normal rebalance +success=0 +for i in {1..10} +do + if [ -f $B0/${V0}0/d/$i ] + then + if [ "${checksums[i]}" == "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ] + then + success=$((success+1)) + fi + else + if [ "${checksums[i]}" == "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ] + then + success=$((success+1)) + fi + fi +done + +EXPECT "^10$" echo $success +cleanup diff --git a/tests/basic/fencing/afr-lock-heal-advanced.c b/tests/basic/fencing/afr-lock-heal-advanced.c index e202ccd5b2..31feedddd2 100644 --- a/tests/basic/fencing/afr-lock-heal-advanced.c +++ b/tests/basic/fencing/afr-lock-heal-advanced.c @@ -97,6 +97,13 @@ acquire_mandatory_lock(glfs_t *fs, glfs_fd_t *fd) goto out; } + ret = glfs_fd_set_lkowner(fd, fd, sizeof(fd)); + if (ret) { + LOG_ERR("glfs_fd_set_lkowner", errno); + ret = -1; + goto out; + } + /* take a write mandatory lock */ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY); if (ret) { diff --git a/tests/basic/fencing/afr-lock-heal-basic.c b/tests/basic/fencing/afr-lock-heal-basic.c index 768c9e5718..a0a55de450 100644 --- a/tests/basic/fencing/afr-lock-heal-basic.c +++ b/tests/basic/fencing/afr-lock-heal-basic.c @@ -101,6 +101,13 @@ acquire_mandatory_lock(glfs_t *fs, char *fname) pause(); + ret = glfs_fd_set_lkowner(fd, fd, sizeof(fd)); + if (ret) { + LOG_ERR("glfs_fd_set_lkowner", errno); + ret = -1; + goto out; + } + /* take a write mandatory lock */ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY); if (ret) { diff --git a/tests/volume.rc b/tests/volume.rc index b38848c0e5..af80715a70 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -75,6 +75,10 @@ function rebalance_status_field { $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p } +function rebalanced_files_field { + $CLI volume rebalance $1 status | awk '{print $2}' | sed -n 3p +} + function fix-layout_status_field { #The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4. #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00'). diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 109fd4b742..71209637f1 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1024,11 +1024,17 @@ afr_shd_index_healer(void *data) loc_t loc = { 0, }; + gf_lkowner_t lkowner; + pid_t pid = GF_CLIENT_PID_SELF_HEALD; healer = data; THIS = this = healer->this; priv = this->private; + syncopctx_setfspid(&pid); + set_lk_owner_from_ptr(&lkowner, &lkowner); + syncopctx_setfslkowner(&lkowner); + for (;;) { afr_shd_healer_wait(healer); diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 8ba0cc4c73..10ae05db14 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -5872,8 +5872,6 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - frame->root->pid = GF_CLIENT_PID_DEFRAG; - ret = dht_start_rebalance_task(this, frame); if (!ret) return 0; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index be7f6a3cfc..00d3457f2d 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2337,9 +2337,10 @@ rebalance_task(void *data) } static int -rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data) +rebalance_task_completion(int op_ret, call_frame_t *syncop_frame, void *data) { int32_t op_errno = EINVAL; + call_frame_t *setxattr_frame = data; if (op_ret == -1) { /* Failure of migration process, mostly due to write process. @@ -2359,7 +2360,9 @@ rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data) op_ret = -1; } - DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL); + DHT_STACK_UNWIND(setxattr, setxattr_frame, op_ret, op_errno, NULL); + GF_ASSERT(syncop_frame->local == NULL); + STACK_DESTROY(syncop_frame->root); return 0; } @@ -2367,9 +2370,22 @@ int dht_start_rebalance_task(xlator_t *this, call_frame_t *frame) { int ret = -1; + call_frame_t *syncop_frame = NULL; + + syncop_frame = copy_frame(frame); + if (!syncop_frame) { + goto out; + } + + syncop_frame->root->pid = GF_CLIENT_PID_DEFRAG; + set_lk_owner_from_ptr(&syncop_frame->root->lk_owner, syncop_frame->root); ret = synctask_new(this->ctx->env, rebalance_task, - rebalance_task_completion, frame, frame); + rebalance_task_completion, syncop_frame, frame); +out: + if ((ret < 0) && syncop_frame) { + STACK_DESTROY(syncop_frame->root); + } return ret; } @@ -2908,6 +2924,7 @@ gf_defrag_task(void *opaque) gf_defrag_info_t *defrag = NULL; int ret = 0; pid_t pid = GF_CLIENT_PID_DEFRAG; + gf_lkowner_t lkowner; defrag = (gf_defrag_info_t *)opaque; if (!defrag) { @@ -2916,6 +2933,11 @@ gf_defrag_task(void *opaque) } syncopctx_setfspid(&pid); + /* setting lkowner stack memory address as lkowner + * which will be unique per thread*/ + set_lk_owner_from_ptr(&lkowner, &lkowner); + syncopctx_setfslkowner(&lkowner); + q_head = &(defrag->queue[0].list); diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 7d991f04aa..2226334227 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -2698,6 +2698,8 @@ ec_heal_done(int ret, call_frame_t *heal, void *opaque) { if (opaque) ec_fop_data_release(opaque); + if (heal) + STACK_DESTROY(heal->root); return 0; } @@ -2759,10 +2761,9 @@ out: if (ret < 0) { ec_fop_set_error(fop, ENOMEM); ec_heal_fail(ec, fop); + if (frame) + STACK_DESTROY(frame->root); } - - if (frame) - STACK_DESTROY(frame->root); } void diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c index a2b0ad11fe..eaa71f8189 100644 --- a/xlators/mount/fuse/src/fuse-helpers.c +++ b/xlators/mount/fuse/src/fuse-helpers.c @@ -383,7 +383,8 @@ get_call_frame_for_req(fuse_state_t *state) frame->root->uid = finh->uid; frame->root->gid = finh->gid; frame->root->pid = finh->pid; - set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner); + if (state->lk_owner) + set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner); } get_groups(priv, frame); diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 0f31fea951..00c5dced1c 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1533,6 +1533,7 @@ client_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, if (!conf || !conf->fops) goto out; + GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner)); proc = &conf->fops->proctable[GF_FOP_INODELK]; if (proc->fn) { args.loc = loc; @@ -1564,6 +1565,7 @@ client_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, if (!conf || !conf->fops) goto out; + GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner)); proc = &conf->fops->proctable[GF_FOP_FINODELK]; if (proc->fn) { args.fd = fd; @@ -1596,6 +1598,7 @@ client_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, if (!conf || !conf->fops) goto out; + GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner)); proc = &conf->fops->proctable[GF_FOP_ENTRYLK]; if (proc->fn) { args.loc = loc; @@ -1629,6 +1632,7 @@ client_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, if (!conf || !conf->fops) goto out; + GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner)); proc = &conf->fops->proctable[GF_FOP_FENTRYLK]; if (proc->fn) { args.fd = fd; |