summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar Karampuri <pranith.karampuri@phonepe.com>2021-03-10 10:43:24 +0530
committerGitHub <noreply@github.com>2021-03-10 10:43:24 +0530
commit46949c4951eb1d2eb0a90c21db66c31e444bffe8 (patch)
tree7bb6efd5f605551a7872b462caef8530c05adc90
parentdc9bab7959b068617ef00f355c63bdca060b9605 (diff)
downloadglusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.gz
glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.xz
glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.zip
features/index: Optimize link-count fetching code path (#1789)
* features/index: Optimize link-count fetching code path Problem: AFR requests 'link-count' in lookup to check if there are any pending heals. Based on this information, afr will set dirent->inode to NULL in readdirp when heals are ongoing to prevent serving bad data. When heals are completed, link-count xattr is leading to doing an opendir of xattrop directory and then reading the contents to figure out that there is no healing needed for every lookup. This was not detected until this github issue because ZFS in some cases can lead to very slow readdir() calls. Since Glusterfs does lot of lookups, this was slowing down all operations increasing load on the system. Code problem: index xlator on any xattrop operation adds index to the relevant dirs and after the xattrop operation is done, will delete/keep the index in that directory based on the value fetched in xattrop from posix. AFR sends all-zero xattrop for changelog xattrs. This is leading to priv->pending_count manipulation which sets the count back to -1. Next Lookup operation triggers opendir/readdir to find the actual link-count in lookup because in memory priv->pending_count is -ve. Fix: 1) Don't add to index on all-zero xattrop for a key. 2) Set pending-count to -1 when the first gfid is added into xattrop directory, so that the next lookup can compute the link-count. fixes: #1764 Change-Id: I8a02c7e811a72c46d78ddb2d9d4fdc2222a444e9 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com> * addressed comments Change-Id: Ide42bb1c1237b525d168bf1a9b82eb1bdc3bc283 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com> * tests: Handle base index absence Change-Id: I3cf11a8644ccf23e01537228766f864b63c49556 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com> * Addressed LOCK based comments, .t comments Change-Id: I5f53e40820cade3a44259c1ac1a7f3c5f2f0f310 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
-rw-r--r--tests/afr.rc18
-rw-r--r--tests/basic/afr/bug-1493415-gfid-heal-non-granular.t2
-rw-r--r--tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t10
-rw-r--r--tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t5
-rw-r--r--tests/basic/afr/data-self-heal.t22
-rw-r--r--tests/basic/afr/entry-self-heal-anon-dir-off.t8
-rw-r--r--tests/basic/afr/entry-self-heal.t8
-rw-r--r--tests/bugs/replicate/bug-1101647.t11
-rw-r--r--tests/bugs/replicate/bug-1493415-gfid-heal.t5
-rw-r--r--tests/bugs/replicate/bug-1626994-info-split-brain.t13
-rw-r--r--tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t10
-rw-r--r--tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t5
-rw-r--r--tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t12
-rw-r--r--tests/bugs/replicate/mdata-heal-no-xattrs.t6
-rw-r--r--tests/features/index/index-link-count-lifecycle.t96
-rw-r--r--tests/volume.rc13
-rw-r--r--xlators/features/index/src/index.c77
17 files changed, 209 insertions, 112 deletions
diff --git a/tests/afr.rc b/tests/afr.rc
index 241789903b..d73e37dae5 100644
--- a/tests/afr.rc
+++ b/tests/afr.rc
@@ -7,11 +7,27 @@ function create_brick_xattrop_entry {
local params=`echo "$@" | cut -d' ' -f2-`
echo $params
+ exit_val=0
for file in $params
do
gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $1/$file))
- ln $xattrop_dir/$base_entry $xattrop_dir/$gfid_str
+ if [ -z "$base_entry"];
+ then
+ touch $xattrop_dir/$gfid_str
+ else
+ ln $xattrop_dir/$base_entry $xattrop_dir/$gfid_str
+ fi
+
+ if [ $? -ne 0 ];
+ then
+ exit_val=1
+ fi
done
+
+ if [ $exit_val -eq 1 ];
+ then
+ false
+ fi
}
function diff_dirs {
diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
index aff001cb70..586cf50481 100644
--- a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
@@ -56,7 +56,7 @@ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
#Now simulate setting of pending entry xattr on parent dir of 1st brick.
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
-create_brick_xattrop_entry $B0/${V0}0 dir
+TEST create_brick_xattrop_entry $B0/${V0}0 dir
# storage/posix considers that a file without gfid changed less than a second
# before doesn't exist, so we need to wait for a second to force posix to
diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
index 9079c93039..de3e460d0e 100644
--- a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
@@ -33,10 +33,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/di
setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
# Add entry to xattrop dir to trigger index heal.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
-ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+TEST create_brick_xattrop_entry $B0/${V0}0 dir
EXPECT "^1$" get_pending_heal_count $V0
# Remove the gfid xattr and the link file on one brick.
@@ -79,10 +76,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/di
setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
# Add entry to xattrop dir to trigger index heal.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
-ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+TEST create_brick_xattrop_entry $B0/${V0}0 dir
EXPECT "^1$" get_pending_heal_count $V0
# Remove the gfid xattr and the link file on two bricks.
diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
index 4f27da4ad3..decedd03db 100644
--- a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
@@ -64,10 +64,7 @@ setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/di
setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
# Add entry to xattrop dir on first brick.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
-TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+TEST create_brick_xattrop_entry $B0/$V0"0" dir
EXPECT "^1$" get_pending_heal_count $V0
diff --git a/tests/basic/afr/data-self-heal.t b/tests/basic/afr/data-self-heal.t
index 0f417b4a0b..390b414f52 100644
--- a/tests/basic/afr/data-self-heal.t
+++ b/tests/basic/afr/data-self-heal.t
@@ -7,18 +7,7 @@
cleanup;
function create_xattrop_entry {
- local xattrop_dir0=$(afr_get_index_path $B0/brick0)
- local xattrop_dir1=$(afr_get_index_path $B0/brick1)
- local base_entry_b0=`ls $xattrop_dir0`
- local base_entry_b1=`ls $xattrop_dir1`
- local gfid_str
-
- for file in "$@"
- do
- gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/brick0/$file))
- ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
- ln $xattrop_dir1/$base_entry_b1 $xattrop_dir1/$gfid_str
- done
+ create_brick_xattrop_entry $B0/brick0 "$@" && create_brick_xattrop_entry $B0/brick1 "$@" || exit 1
}
function is_heal_done {
@@ -154,15 +143,8 @@ TEST "echo def > $B0/brick1/v1-dirty.txt"
TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/v1-dirty.txt
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000100000000000000000 $B0/brick1/v1-dirty.txt
-#Create base entry in indices/xattrop
-echo "Data" > $M0/FILE
-rm -f $M0/FILE
-EXPECT "1" count_index_entries $B0/brick0
-EXPECT "1" count_index_entries $B0/brick1
-cd -
-
#Create gfid hard links for all files before triggering index heals.
-create_xattrop_entry pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
+TEST create_xattrop_entry pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
index 7bb6ee1419..c6caa4d028 100644
--- a/tests/basic/afr/entry-self-heal-anon-dir-off.t
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
@@ -316,16 +316,10 @@ $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
-#Create base entry in indices/xattrop
-echo "Data" > $M0/FILE
-rm -f $M0/FILE
-EXPECT "1" count_index_entries $B0/${V0}0
-EXPECT "1" count_index_entries $B0/${V0}1
-
TEST $CLI volume stop $V0;
#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
-create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+TEST create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
$CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t
index 0c1da7d211..6eadbf1d80 100644
--- a/tests/basic/afr/entry-self-heal.t
+++ b/tests/basic/afr/entry-self-heal.t
@@ -316,16 +316,10 @@ $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
-#Create base entry in indices/xattrop
-echo "Data" > $M0/FILE
-rm -f $M0/FILE
-EXPECT "1" count_index_entries $B0/${V0}0
-EXPECT "1" count_index_entries $B0/${V0}1
-
TEST $CLI volume stop $V0;
#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
-create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+TEST create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
$CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
diff --git a/tests/bugs/replicate/bug-1101647.t b/tests/bugs/replicate/bug-1101647.t
index 708bc1a1e2..998794e086 100644
--- a/tests/bugs/replicate/bug-1101647.t
+++ b/tests/bugs/replicate/bug-1101647.t
@@ -11,17 +11,20 @@ TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
TEST $CLI volume start $V0;
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+#Create base entry in indices/xattrop
+echo "Data">$M0/file
+EXPECT 3 count_index_entries $B0/$V0"1"
+TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
-#Create base entry in indices/xattrop
-echo "Data">$M0/file
-TEST $CLI volume heal $V0
#Entries from indices/xattrop should not be cleared after a heal.
EXPECT 1 count_index_entries $B0/$V0"1"
-EXPECT 1 count_index_entries $B0/$V0"2"
TEST kill_brick $V0 $H0 $B0/${V0}2
echo "More data">>$M0/file
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
index eceb289104..20ce4ee1dd 100644
--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
@@ -14,9 +14,6 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST $CLI volume set $V0 self-heal-daemon off
-# Create base entry in indices/xattrop
-echo "Data" > $M0/FILE
-
#------------------------------------------------------------------------------#
TEST touch $M0/f1
gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
@@ -55,7 +52,7 @@ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
#Now simulate setting of pending entry xattr on parent dir of 1st brick.
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir
-create_brick_xattrop_entry $B0/${V0}0 dir
+TEST create_brick_xattrop_entry $B0/${V0}0 dir
# storage/posix considers that a file without gfid changed less than a second
# before doesn't exist, so we need to wait for a second to force posix to
diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t
index 86bfecb1a9..79a29e6f47 100644
--- a/tests/bugs/replicate/bug-1626994-info-split-brain.t
+++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t
@@ -22,13 +22,6 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
-# Create base entry in indices/xattrop
-echo "Data" > $M0/FILE
-rm -f $M0/FILE
-EXPECT "1" count_index_entries $B0/${V0}0
-EXPECT "1" count_index_entries $B0/${V0}1
-EXPECT "1" count_index_entries $B0/${V0}2
-
TEST mkdir $M0/dirty_dir
TEST mkdir $M0/pending_dir
@@ -37,7 +30,7 @@ TEST mkdir $M0/pending_dir
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir
-create_brick_xattrop_entry $B0/${V0}0 dirty_dir
+TEST create_brick_xattrop_entry $B0/${V0}0 dirty_dir
# Should not show up as split-brain.
EXPECT "0" afr_get_split_brain_count $V0
@@ -46,7 +39,7 @@ EXPECT "0" afr_get_split_brain_count $V0
TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0
TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1
TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2
-create_brick_xattrop_entry $B0/${V0}0 "/"
+TEST create_brick_xattrop_entry $B0/${V0}0 "/"
# Should not show up as split-brain.
EXPECT "0" afr_get_split_brain_count $V0
@@ -55,7 +48,7 @@ EXPECT "0" afr_get_split_brain_count $V0
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir
TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir
TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir
-create_brick_xattrop_entry $B0/${V0}0 pending_dir
+TEST create_brick_xattrop_entry $B0/${V0}0 pending_dir
# Should show up as split-brain.
EXPECT "1" afr_get_split_brain_count $V0
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
index 1fdf7ea2da..0bd2b48cde 100644
--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
@@ -34,10 +34,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/di
setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
# Add entry to xattrop dir to trigger index heal.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
-ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+TEST create_brick_xattrop_entry $B0/$V0"0" dir
EXPECT "^1$" get_pending_heal_count $V0
# Remove the gfid xattr and the link file on one brick.
@@ -82,10 +79,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/di
setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
# Add entry to xattrop dir to trigger index heal.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
-ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+TEST create_brick_xattrop_entry $B0/$V0"0" dir
EXPECT "^1$" get_pending_heal_count $V0
# Remove the gfid xattr and the link file on two bricks.
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
index 3da873a922..48e46e117e 100644
--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
@@ -66,10 +66,7 @@ setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/di
setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
# Add entry to xattrop dir on first brick.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
-TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+TEST create_brick_xattrop_entry $B0/$V0"0" dir
EXPECT "^1$" get_pending_heal_count $V0
diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
index c1bdf34ee6..52cb5214cb 100644
--- a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
+++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
@@ -79,21 +79,11 @@ TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0
#-------------------------------------------------------------------------------
#Add entry to xattrop dir on first brick and check for split-brain.
-xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_dir0`
-
-gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1))
-TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1
gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1))
-TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1
gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3))
-gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1))
-TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1
-
-gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1))
-TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1
+TEST create_brick_xattrop_entry $B0/$V0"0" file1 .shard/$gfid_f2.1 .shard/$gfid_f3.1 .shard/$gfid_f4.1
#-------------------------------------------------------------------------------
#gfid split-brain won't show up in split-brain count.
diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t
index d3b0c504c8..8d7da8c187 100644
--- a/tests/bugs/replicate/mdata-heal-no-xattrs.t
+++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t
@@ -2,6 +2,7 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
cleanup;
TEST glusterd
@@ -23,10 +24,7 @@ TEST [ $ret -eq 0 ]
TEST chmod +x $B0/$V0"0"/FILE
# Add gfid to xattrop
-xattrop_b0=$(afr_get_index_path $B0/$V0"0")
-base_entry_b0=`ls $xattrop_b0`
-gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE))
-TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE
+TEST create_brick_xattrop_entry $B0/$V0"0" FILE
EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
TEST $CLI volume set $V0 cluster.self-heal-daemon on
diff --git a/tests/features/index/index-link-count-lifecycle.t b/tests/features/index/index-link-count-lifecycle.t
new file mode 100644
index 0000000000..618a0f712d
--- /dev/null
+++ b/tests/features/index/index-link-count-lifecycle.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+#Index link-count lifecycle tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 disable
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#When the bricks are started link-count should be zero if no heals are needed
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count"
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count"
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count"
+
+#No index file should be created when op succeeds on all bricks
+echo abc > $M0/abc
+TEST rm -f $M0/abc
+EXPECT "^0$" count_index_entries $B0/brick0
+EXPECT "^0$" count_index_entries $B0/brick1
+EXPECT "^0$" count_index_entries $B0/brick2
+#When heal is needed xattrop-pending-count should reflect number of files to be healed
+TEST kill_brick $V0 $H0 $B0/brick0
+echo abc > $M0/a
+TEST ls $M0 #Perform a lookup to make sure the values are updated
+EXPECT "^2$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count"
+EXPECT "^2$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count"
+
+#Once heals are completed pending count should be back to zero
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count"
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count"
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count"
+
+cleanup;
+
+#Same tests for EC volume, EC doesn't fetch link-count, so it is not refreshed
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 disable
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#When the bricks are started link-count should be zero if no heals are needed
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count"
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count"
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count"
+
+#When heal is needed xattrop-pending-count should reflect number of files to be healed
+TEST kill_brick $V0 $H0 $B0/brick0
+echo abc > $M0/a
+TEST ls $M0 #EC doesn't request link-count, so the values will stay '-1'
+EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count"
+EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count"
+
+#Once heals are completed pending count should be back to zero
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count_shd $V0 0
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count"
+#pending-count is never requested on disperse so it will be stuck at -1(i.e. cache is invalidated) after heal completes
+EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count"
+EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count"
+cleanup;
+
+#Same tests for distribute volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#When the brick is started link-count should be zero
+EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count"
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index e25e2312bf..afb296cb9e 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -838,6 +838,19 @@ function check_brick_multiplex() {
fi
}
+function get_value_from_brick_statedump {
+ local vol="$1"
+ local host="$2"
+ local brick="$3"
+ local key="$4"
+
+ local statedump="$(generate_brick_statedump $vol $host $brick)"
+ value="$(grep "$key" $statedump | cut -f2 -d'=' | tail -1)"
+
+ rm -f "$statedump"
+ echo "$value"
+}
+
function get_fd_count {
local vol=$1
local host=$2
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 68c18b4134..bebe47fdaa 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -11,6 +11,7 @@
#include <glusterfs/options.h>
#include "glusterfs3-xdr.h"
#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
#include <glusterfs/syncop.h>
#include <glusterfs/common-utils.h>
#include "index-messages.h"
@@ -422,15 +423,24 @@ index_get_link_count(index_priv_t *priv, int64_t *count,
}
static void
-index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type)
+index_update_link_count_cache(index_priv_t *priv, index_xattrop_type_t type,
+ int link_count_delta)
{
switch (type) {
case XATTROP:
LOCK(&priv->lock);
{
- priv->pending_count--;
- if (priv->pending_count == 0)
- priv->pending_count--;
+ if (priv->pending_count >= 0) {
+ if (link_count_delta == -1) {
+ priv->pending_count--;
+ }
+ /*If this is the first xattrop, then pending_count needs to
+ * be updated for the next lstat/lookup with link-count
+ * xdata*/
+ if (priv->pending_count == 0) {
+ priv->pending_count--; /*Invalidate cache*/
+ }
+ }
}
UNLOCK(&priv->lock);
break;
@@ -664,6 +674,9 @@ index_add(xlator_t *this, uuid_t gfid, const char *subdir,
if (!ret)
goto out;
ret = index_link_to_base(this, gfid_path, subdir);
+ if (ret == 0) {
+ index_update_link_count_cache(priv, type, 1);
+ }
out:
return ret;
}
@@ -717,7 +730,10 @@ index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type)
goto out;
}
- index_dec_link_count(priv, type);
+ /* If errno is ENOENT then ret won't be zero */
+ if (ret == 0) {
+ index_update_link_count_cache(priv, type, -1);
+ }
ret = 0;
out:
return ret;
@@ -777,7 +793,12 @@ index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata)
idx = index_find_xattr_type(d, k, v);
if (idx == -1)
return 0;
- zfilled[idx] = 0;
+
+ /* If an xattr value is all-zero leave zfilled[idx] as -1 so that xattrop
+ * index add/del won't happen */
+ if (!memeqzero((const char *)v->data, v->len)) {
+ zfilled[idx] = 0;
+ }
return 0;
}
@@ -797,7 +818,7 @@ _check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp)
* zfilled[idx] will be 0(false) if value not zero.
* will be 1(true) if value is zero.
*/
- if (mem_0filled((const char *)v->data, v->len)) {
+ if (!memeqzero((const char *)v->data, v->len)) {
zfilled[idx] = 0;
return 0;
}
@@ -1284,21 +1305,21 @@ index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
else
x_cbk = index_xattrop64_cbk;
- // In wind phase bring the gfid into index. This way if the brick crashes
- // just after posix performs xattrop before _cbk reaches index xlator
- // we will still have the gfid in index.
+ /* In wind phase bring the gfid into index. This way if the brick crashes
+ * just after posix performs xattrop before _cbk reaches index xlator
+ * we will still have the gfid in index.
+ */
memset(zfilled, -1, sizeof(zfilled));
- /* Foreach xattr, set corresponding index of zfilled to 1
- * zfilled[index] = 1 implies the xattr's value is zero filled
- * and should be added in its corresponding subdir.
+ /* zfilled[index] = 0 implies the xattr's value is not zero filled
+ * and should be added in its corresponding index subdir.
*
- * zfilled should be set to 1 only for those index that
- * exist in xattr variable. This is to distinguish
+ * zfilled should be set to 0 only for those index that
+ * exist in xattr variable and xattr value non-zero. This is to distinguish
* between different types of volumes.
* For e.g., if the check is not made,
- * zfilled[DIRTY] is set to 1 for EC volumes,
- * index file will be tried to create in indices/dirty dir
+ * zfilled[DIRTY] is set to 0 for EC volumes,
+ * index file will be created in indices/dirty dir
* which doesn't exist for an EC volume.
*/
ret = dict_foreach(xattr, index_fill_zero_array, zfilled);
@@ -1961,7 +1982,7 @@ out:
return 0;
}
-int64_t
+static int64_t
index_fetch_link_count(xlator_t *this, index_xattrop_type_t type)
{
index_priv_t *priv = this->private;
@@ -2023,6 +2044,7 @@ index_fetch_link_count(xlator_t *this, index_xattrop_type_t type)
out:
if (dirp)
(void)sys_closedir(dirp);
+
return count;
}
@@ -2311,6 +2333,21 @@ out:
return ret;
}
+static int
+index_priv_dump(xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ priv = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("xattrop-pending-count", "%"PRId64, priv->pending_count);
+
+ return 0;
+}
+
int32_t
mem_acct_init(xlator_t *this)
{
@@ -2641,7 +2678,9 @@ struct xlator_fops fops = {
.fstat = index_fstat,
};
-struct xlator_dumpops dumpops;
+struct xlator_dumpops dumpops = {
+ .priv = index_priv_dump,
+};
struct xlator_cbks cbks = {.forget = index_forget,
.release = index_release,