afr: prevent winding inodelks twice for arbiter volumes

Problem: In an arbiter volume, if there is a pending data heal of a file only on arbiter brick, self-heal takes inodelks twice due to a code-bug but unlocks it only once, leaving behind a stale lock on the brick. This causes the next write to the file to hang. Fix: Fix the code-bug to take lock only once. This bug was introduced master with commit eb472d82a083883335bc494b87ea175ac43471ff Thanks to Pranith Kumar K <pkarampu@redhat.com> for finding the RCA. fixes: bz#1637802 Change-Id: I15ad969e10a6a3c4bd255e2948b6be6dcddc61e1 Signed-off-by: Ravishankar N <ravishankar@redhat.com>
author: Ravishankar N <ravishankar@redhat.com> 2018-10-10 12:18:55 +0530
committer: Amar Tumballi <amarts@redhat.com> 2018-10-10 16:18:55 +0000
commit: 196b32423af9a5088056fe10ea22d01494670931 (patch)
tree: 4760872562d1e14d721dd6627378a204867f1be7 /tests
parent: 4da0d93f8703c7247fece9d9fb471742e6fd7c33 (diff)
download: glusterfs-196b32423af9a5088056fe10ea22d01494670931.tar.gz
glusterfs-196b32423af9a5088056fe10ea22d01494670931.tar.xz
glusterfs-196b32423af9a5088056fe10ea22d01494670931.zip
1 files changed, 44 insertions, 0 deletions
diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
new file mode 100644
index 0000000000..91ed39beb9
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+# Test to check that data self-heal does not leave any stale lock.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+
+# Kill arbiter brick and write to FILE.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "arbiter down" >> $M0/FILE
+EXPECT 2 get_pending_heal_count $V0
+
+# Bring it back up and let heal complete.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# write to the FILE must succeed.
+echo "this must succeed" >> $M0/FILE
+TEST [ $? -eq 0 ]
+cleanup;
author	Ravishankar N <ravishankar@redhat.com>	2018-10-10 12:18:55 +0530
committer	Amar Tumballi <amarts@redhat.com>	2018-10-10 16:18:55 +0000
commit	196b32423af9a5088056fe10ea22d01494670931 (patch)
tree	4760872562d1e14d721dd6627378a204867f1be7 /tests
parent	4da0d93f8703c7247fece9d9fb471742e6fd7c33 (diff)
download	glusterfs-196b32423af9a5088056fe10ea22d01494670931.tar.gz glusterfs-196b32423af9a5088056fe10ea22d01494670931.tar.xz glusterfs-196b32423af9a5088056fe10ea22d01494670931.zip