summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar Karampuri <pranith.karampuri@phonepe.com>2020-10-29 09:52:20 +0530
committerGitHub <noreply@github.com>2020-10-29 09:52:20 +0530
commit02074cfe3b02f229f0c0ff468debf5e29adfdb62 (patch)
treef27f2b3c8a8f9de4922fb23276792577c7956c9e
parentfd8546370a95c216df08cf2901705e6ba989fa16 (diff)
downloadglusterfs-02074cfe3b02f229f0c0ff468debf5e29adfdb62.tar.gz
glusterfs-02074cfe3b02f229f0c0ff468debf5e29adfdb62.tar.xz
glusterfs-02074cfe3b02f229f0c0ff468debf5e29adfdb62.zip
cluster/dht: Perform migrate-file with lk-owner (#1581)
* cluster/dht: Perform migrate-file with lk-owner 1) Added GF_ASSERT() calls in client-xlator to find these issues sooner. 2) Fuse is setting zero-lkowner with len as 8 when the fop doesn't have any lk-owner. Changed this to have len as 0 just as we have in fops triggered from xlators lower to fuse. * syncop: Avoid frame allocation if we can * cluster/dht: Set lkowner in daemon rebalance code path * cluster/afr: Set lkowner for ta-selfheal * cluster/ec: Destroy frame after heal is done * Don't assert for lk-owner in lk call * set lkowner for mandatory lock heal tests fixes: #1529 Change-Id: Ia803db6b00869316893abb1cf435b898eec31228 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
-rw-r--r--libglusterfs/src/syncop.c6
-rw-r--r--tests/basic/distribute/manual-rebalance.t81
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.c7
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.c7
-rw-r--r--tests/volume.rc4
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c6
-rw-r--r--xlators/cluster/dht/src/dht-common.c2
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c28
-rw-r--r--xlators/cluster/ec/src/ec-heal.c7
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c3
-rw-r--r--xlators/protocol/client/src/client.c4
11 files changed, 143 insertions, 12 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index df20cec559..d717faf5a4 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -358,7 +358,7 @@ synctask_destroy(struct synctask *task)
GF_FREE(task->stack);
- if (task->opframe)
+ if (task->opframe && (task->opframe != task->frame))
STACK_DESTROY(task->opframe->root);
if (task->synccbk == NULL) {
@@ -441,7 +441,7 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
set_lk_owner_from_ptr(&newtask->opframe->root->lk_owner,
newtask->opframe->root);
} else {
- newtask->opframe = copy_frame(frame);
+ newtask->opframe = frame;
}
if (!newtask->opframe)
goto err;
@@ -503,7 +503,7 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
err:
if (newtask) {
GF_FREE(newtask->stack);
- if (newtask->opframe)
+ if (newtask->opframe && (newtask->opframe != newtask->frame))
STACK_DESTROY(newtask->opframe->root);
GF_FREE(newtask);
}
diff --git a/tests/basic/distribute/manual-rebalance.t b/tests/basic/distribute/manual-rebalance.t
new file mode 100644
index 0000000000..299b9f16fa
--- /dev/null
+++ b/tests/basic/distribute/manual-rebalance.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests checks if the manual rebalance happens simialr to normal rebalance
+
+TESTS_EXPECTED_IN_LOOP=10
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/d
+declare -a checksums
+for i in {1..10};
+do
+ TEST_IN_LOOP dd if=/dev/urandom of=$M0/d/$i bs=1M count=1
+ checksums[$i]="$(md5sum $M0/d/$i | awk '{print $1}')"
+done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1 force
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+
+errors=0
+migrations=0
+for i in {1..10};
+do
+ setfattr -n trusted.distribute.migrate-data -v 1 $M0/d/$i 2>/dev/null
+ if [ $? -eq 0 ] #Migration happened for the file
+ then
+ if [ "${checksums[i]}" != "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ]
+ then
+ errors=$((errors+1)) #Data on new brick shouldn't change
+ else
+ migrations=$((migrations+1))
+ fi
+ else #Migration is not applicable
+ if [ "${checksums[i]}" != "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ]
+ then
+ errors=$((errors+1)) #Data on old brick shouldn't change
+ fi
+ fi
+done
+
+EXPECT_NOT "^0$" echo $migrations #At least one file should migrate
+EXPECT "^0$" echo $errors
+
+#Test that rebalance crawl is equivalent to manual rebalance
+TEST $CLI volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+EXPECT "^0$" rebalanced_files_field $V0
+
+
+#Do one final check that data didn't change after normal rebalance
+success=0
+for i in {1..10}
+do
+ if [ -f $B0/${V0}0/d/$i ]
+ then
+ if [ "${checksums[i]}" == "$(md5sum $B0/${V0}0/d/$i | awk '{print $1}')" ]
+ then
+ success=$((success+1))
+ fi
+ else
+ if [ "${checksums[i]}" == "$(md5sum $B0/${V0}1/d/$i | awk '{print $1}')" ]
+ then
+ success=$((success+1))
+ fi
+ fi
+done
+
+EXPECT "^10$" echo $success
+cleanup
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.c b/tests/basic/fencing/afr-lock-heal-advanced.c
index e202ccd5b2..31feedddd2 100644
--- a/tests/basic/fencing/afr-lock-heal-advanced.c
+++ b/tests/basic/fencing/afr-lock-heal-advanced.c
@@ -97,6 +97,13 @@ acquire_mandatory_lock(glfs_t *fs, glfs_fd_t *fd)
goto out;
}
+ ret = glfs_fd_set_lkowner(fd, fd, sizeof(fd));
+ if (ret) {
+ LOG_ERR("glfs_fd_set_lkowner", errno);
+ ret = -1;
+ goto out;
+ }
+
/* take a write mandatory lock */
ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
if (ret) {
diff --git a/tests/basic/fencing/afr-lock-heal-basic.c b/tests/basic/fencing/afr-lock-heal-basic.c
index 768c9e5718..a0a55de450 100644
--- a/tests/basic/fencing/afr-lock-heal-basic.c
+++ b/tests/basic/fencing/afr-lock-heal-basic.c
@@ -101,6 +101,13 @@ acquire_mandatory_lock(glfs_t *fs, char *fname)
pause();
+ ret = glfs_fd_set_lkowner(fd, fd, sizeof(fd));
+ if (ret) {
+ LOG_ERR("glfs_fd_set_lkowner", errno);
+ ret = -1;
+ goto out;
+ }
+
/* take a write mandatory lock */
ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
if (ret) {
diff --git a/tests/volume.rc b/tests/volume.rc
index b38848c0e5..af80715a70 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -75,6 +75,10 @@ function rebalance_status_field {
$CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
}
+function rebalanced_files_field {
+ $CLI volume rebalance $1 status | awk '{print $2}' | sed -n 3p
+}
+
function fix-layout_status_field {
#The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
#But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 109fd4b742..71209637f1 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1024,11 +1024,17 @@ afr_shd_index_healer(void *data)
loc_t loc = {
0,
};
+ gf_lkowner_t lkowner;
+ pid_t pid = GF_CLIENT_PID_SELF_HEALD;
healer = data;
THIS = this = healer->this;
priv = this->private;
+ syncopctx_setfspid(&pid);
+ set_lk_owner_from_ptr(&lkowner, &lkowner);
+ syncopctx_setfslkowner(&lkowner);
+
for (;;) {
afr_shd_healer_wait(healer);
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 8ba0cc4c73..10ae05db14 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -5872,8 +5872,6 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
-
ret = dht_start_rebalance_task(this, frame);
if (!ret)
return 0;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index be7f6a3cfc..00d3457f2d 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2337,9 +2337,10 @@ rebalance_task(void *data)
}
static int
-rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
+rebalance_task_completion(int op_ret, call_frame_t *syncop_frame, void *data)
{
int32_t op_errno = EINVAL;
+ call_frame_t *setxattr_frame = data;
if (op_ret == -1) {
/* Failure of migration process, mostly due to write process.
@@ -2359,7 +2360,9 @@ rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
op_ret = -1;
}
- DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL);
+ DHT_STACK_UNWIND(setxattr, setxattr_frame, op_ret, op_errno, NULL);
+ GF_ASSERT(syncop_frame->local == NULL);
+ STACK_DESTROY(syncop_frame->root);
return 0;
}
@@ -2367,9 +2370,22 @@ int
dht_start_rebalance_task(xlator_t *this, call_frame_t *frame)
{
int ret = -1;
+ call_frame_t *syncop_frame = NULL;
+
+ syncop_frame = copy_frame(frame);
+ if (!syncop_frame) {
+ goto out;
+ }
+
+ syncop_frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ set_lk_owner_from_ptr(&syncop_frame->root->lk_owner, syncop_frame->root);
ret = synctask_new(this->ctx->env, rebalance_task,
- rebalance_task_completion, frame, frame);
+ rebalance_task_completion, syncop_frame, frame);
+out:
+ if ((ret < 0) && syncop_frame) {
+ STACK_DESTROY(syncop_frame->root);
+ }
return ret;
}
@@ -2908,6 +2924,7 @@ gf_defrag_task(void *opaque)
gf_defrag_info_t *defrag = NULL;
int ret = 0;
pid_t pid = GF_CLIENT_PID_DEFRAG;
+ gf_lkowner_t lkowner;
defrag = (gf_defrag_info_t *)opaque;
if (!defrag) {
@@ -2916,6 +2933,11 @@ gf_defrag_task(void *opaque)
}
syncopctx_setfspid(&pid);
+ /* setting lkowner stack memory address as lkowner
+ * which will be unique per thread*/
+ set_lk_owner_from_ptr(&lkowner, &lkowner);
+ syncopctx_setfslkowner(&lkowner);
+
q_head = &(defrag->queue[0].list);
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 7d991f04aa..2226334227 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2698,6 +2698,8 @@ ec_heal_done(int ret, call_frame_t *heal, void *opaque)
{
if (opaque)
ec_fop_data_release(opaque);
+ if (heal)
+ STACK_DESTROY(heal->root);
return 0;
}
@@ -2759,10 +2761,9 @@ out:
if (ret < 0) {
ec_fop_set_error(fop, ENOMEM);
ec_heal_fail(ec, fop);
+ if (frame)
+ STACK_DESTROY(frame->root);
}
-
- if (frame)
- STACK_DESTROY(frame->root);
}
void
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index a2b0ad11fe..eaa71f8189 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -383,7 +383,8 @@ get_call_frame_for_req(fuse_state_t *state)
frame->root->uid = finh->uid;
frame->root->gid = finh->gid;
frame->root->pid = finh->pid;
- set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner);
+ if (state->lk_owner)
+ set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner);
}
get_groups(priv, frame);
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 0f31fea951..00c5dced1c 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -1533,6 +1533,7 @@ client_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
if (!conf || !conf->fops)
goto out;
+ GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner));
proc = &conf->fops->proctable[GF_FOP_INODELK];
if (proc->fn) {
args.loc = loc;
@@ -1564,6 +1565,7 @@ client_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
if (!conf || !conf->fops)
goto out;
+ GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner));
proc = &conf->fops->proctable[GF_FOP_FINODELK];
if (proc->fn) {
args.fd = fd;
@@ -1596,6 +1598,7 @@ client_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
if (!conf || !conf->fops)
goto out;
+ GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner));
proc = &conf->fops->proctable[GF_FOP_ENTRYLK];
if (proc->fn) {
args.loc = loc;
@@ -1629,6 +1632,7 @@ client_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
if (!conf || !conf->fops)
goto out;
+ GF_ASSERT(!is_lk_owner_null(&frame->root->lk_owner));
proc = &conf->fops->proctable[GF_FOP_FENTRYLK];
if (proc->fn) {
args.fd = fd;