diff options
author | Pranith Kumar Karampuri <pranith.karampuri@phonepe.com> | 2021-03-10 10:43:24 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-10 10:43:24 +0530 |
commit | 46949c4951eb1d2eb0a90c21db66c31e444bffe8 (patch) | |
tree | 7bb6efd5f605551a7872b462caef8530c05adc90 | |
parent | dc9bab7959b068617ef00f355c63bdca060b9605 (diff) | |
download | glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.gz glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.xz glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.zip |
features/index: Optimize link-count fetching code path (#1789)
* features/index: Optimize link-count fetching code path
Problem:
AFR requests 'link-count' in lookup to check if there are any pending
heals. Based on this information, afr will set dirent->inode to NULL in
readdirp when heals are ongoing to prevent serving bad data. When heals
are completed, link-count xattr is leading to doing an opendir of
xattrop directory and then reading the contents to figure out that there
is no healing needed for every lookup. This was not detected until this
github issue because ZFS in some cases can lead to very slow readdir()
calls. Since Glusterfs does lot of lookups, this was slowing down
all operations increasing load on the system.
Code problem:
index xlator on any xattrop operation adds index to the relevant dirs
and after the xattrop operation is done, will delete/keep the index in
that directory based on the value fetched in xattrop from posix. AFR
sends all-zero xattrop for changelog xattrs. This is leading to
priv->pending_count manipulation which sets the count back to -1. Next
Lookup operation triggers opendir/readdir to find the actual link-count in
lookup because in memory priv->pending_count is -ve.
Fix:
1) Don't add to index on all-zero xattrop for a key.
2) Set pending-count to -1 when the first gfid is added into xattrop
directory, so that the next lookup can compute the link-count.
fixes: #1764
Change-Id: I8a02c7e811a72c46d78ddb2d9d4fdc2222a444e9
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
* addressed comments
Change-Id: Ide42bb1c1237b525d168bf1a9b82eb1bdc3bc283
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
* tests: Handle base index absence
Change-Id: I3cf11a8644ccf23e01537228766f864b63c49556
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
* Addressed LOCK based comments, .t comments
Change-Id: I5f53e40820cade3a44259c1ac1a7f3c5f2f0f310
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
-rw-r--r-- | tests/afr.rc | 18 | ||||
-rw-r--r-- | tests/basic/afr/bug-1493415-gfid-heal-non-granular.t | 2 | ||||
-rw-r--r-- | tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t | 10 | ||||
-rw-r--r-- | tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t | 5 | ||||
-rw-r--r-- | tests/basic/afr/data-self-heal.t | 22 | ||||
-rw-r--r-- | tests/basic/afr/entry-self-heal-anon-dir-off.t | 8 | ||||
-rw-r--r-- | tests/basic/afr/entry-self-heal.t | 8 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1101647.t | 11 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1493415-gfid-heal.t | 5 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1626994-info-split-brain.t | 13 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t | 10 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t | 5 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t | 12 | ||||
-rw-r--r-- | tests/bugs/replicate/mdata-heal-no-xattrs.t | 6 | ||||
-rw-r--r-- | tests/features/index/index-link-count-lifecycle.t | 96 | ||||
-rw-r--r-- | tests/volume.rc | 13 | ||||
-rw-r--r-- | xlators/features/index/src/index.c | 77 |
17 files changed, 209 insertions, 112 deletions
diff --git a/tests/afr.rc b/tests/afr.rc index 241789903b..d73e37dae5 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -7,11 +7,27 @@ function create_brick_xattrop_entry { local params=`echo "$@" | cut -d' ' -f2-` echo $params + exit_val=0 for file in $params do gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $1/$file)) - ln $xattrop_dir/$base_entry $xattrop_dir/$gfid_str + if [ -z "$base_entry"]; + then + touch $xattrop_dir/$gfid_str + else + ln $xattrop_dir/$base_entry $xattrop_dir/$gfid_str + fi + + if [ $? -ne 0 ]; + then + exit_val=1 + fi done + + if [ $exit_val -eq 1 ]; + then + false + fi } function diff_dirs { diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t index aff001cb70..586cf50481 100644 --- a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t +++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t @@ -56,7 +56,7 @@ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 #Now simulate setting of pending entry xattr on parent dir of 1st brick. TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir -create_brick_xattrop_entry $B0/${V0}0 dir +TEST create_brick_xattrop_entry $B0/${V0}0 dir # storage/posix considers that a file without gfid changed less than a second # before doesn't exist, so we need to wait for a second to force posix to diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t index 9079c93039..de3e460d0e 100644 --- a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t +++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t @@ -33,10 +33,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/di setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir # Add entry to xattrop dir to trigger index heal. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` -gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) -ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +TEST create_brick_xattrop_entry $B0/${V0}0 dir EXPECT "^1$" get_pending_heal_count $V0 # Remove the gfid xattr and the link file on one brick. @@ -79,10 +76,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/di setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir # Add entry to xattrop dir to trigger index heal. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` -gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) -ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +TEST create_brick_xattrop_entry $B0/${V0}0 dir EXPECT "^1$" get_pending_heal_count $V0 # Remove the gfid xattr and the link file on two bricks. diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t index 4f27da4ad3..decedd03db 100644 --- a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t +++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t @@ -64,10 +64,7 @@ setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/di setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir # Add entry to xattrop dir on first brick. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` -gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) -TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +TEST create_brick_xattrop_entry $B0/$V0"0" dir EXPECT "^1$" get_pending_heal_count $V0 diff --git a/tests/basic/afr/data-self-heal.t b/tests/basic/afr/data-self-heal.t index 0f417b4a0b..390b414f52 100644 --- a/tests/basic/afr/data-self-heal.t +++ b/tests/basic/afr/data-self-heal.t @@ -7,18 +7,7 @@ cleanup; function create_xattrop_entry { - local xattrop_dir0=$(afr_get_index_path $B0/brick0) - local xattrop_dir1=$(afr_get_index_path $B0/brick1) - local base_entry_b0=`ls $xattrop_dir0` - local base_entry_b1=`ls $xattrop_dir1` - local gfid_str - - for file in "$@" - do - gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/brick0/$file)) - ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str - ln $xattrop_dir1/$base_entry_b1 $xattrop_dir1/$gfid_str - done + create_brick_xattrop_entry $B0/brick0 "$@" && create_brick_xattrop_entry $B0/brick1 "$@" || exit 1 } function is_heal_done { @@ -154,15 +143,8 @@ TEST "echo def > $B0/brick1/v1-dirty.txt" TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/v1-dirty.txt TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000100000000000000000 $B0/brick1/v1-dirty.txt -#Create base entry in indices/xattrop -echo "Data" > $M0/FILE -rm -f $M0/FILE -EXPECT "1" count_index_entries $B0/brick0 -EXPECT "1" count_index_entries $B0/brick1 -cd - - #Create gfid hard links for all files before triggering index heals. -create_xattrop_entry pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt +TEST create_xattrop_entry pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t index 7bb6ee1419..c6caa4d028 100644 --- a/tests/basic/afr/entry-self-heal-anon-dir-off.t +++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t @@ -316,16 +316,10 @@ $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -#Create base entry in indices/xattrop -echo "Data" > $M0/FILE -rm -f $M0/FILE -EXPECT "1" count_index_entries $B0/${V0}0 -EXPECT "1" count_index_entries $B0/${V0}1 - TEST $CLI volume stop $V0; #Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal -create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 +TEST create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t index 0c1da7d211..6eadbf1d80 100644 --- a/tests/basic/afr/entry-self-heal.t +++ b/tests/basic/afr/entry-self-heal.t @@ -316,16 +316,10 @@ $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -#Create base entry in indices/xattrop -echo "Data" > $M0/FILE -rm -f $M0/FILE -EXPECT "1" count_index_entries $B0/${V0}0 -EXPECT "1" count_index_entries $B0/${V0}1 - TEST $CLI volume stop $V0; #Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal -create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 +TEST create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 diff --git a/tests/bugs/replicate/bug-1101647.t b/tests/bugs/replicate/bug-1101647.t index 708bc1a1e2..998794e086 100644 --- a/tests/bugs/replicate/bug-1101647.t +++ b/tests/bugs/replicate/bug-1101647.t @@ -11,17 +11,20 @@ TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}; TEST $CLI volume start $V0; TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +#Create base entry in indices/xattrop +echo "Data">$M0/file +EXPECT 3 count_index_entries $B0/$V0"1" +TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -#Create base entry in indices/xattrop -echo "Data">$M0/file -TEST $CLI volume heal $V0 #Entries from indices/xattrop should not be cleared after a heal. EXPECT 1 count_index_entries $B0/$V0"1" -EXPECT 1 count_index_entries $B0/$V0"2" TEST kill_brick $V0 $H0 $B0/${V0}2 echo "More data">>$M0/file diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t index eceb289104..20ce4ee1dd 100644 --- a/tests/bugs/replicate/bug-1493415-gfid-heal.t +++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t @@ -14,9 +14,6 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 TEST $CLI volume set $V0 self-heal-daemon off -# Create base entry in indices/xattrop -echo "Data" > $M0/FILE - #------------------------------------------------------------------------------# TEST touch $M0/f1 gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1) @@ -55,7 +52,7 @@ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 #Now simulate setting of pending entry xattr on parent dir of 1st brick. TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir -create_brick_xattrop_entry $B0/${V0}0 dir +TEST create_brick_xattrop_entry $B0/${V0}0 dir # storage/posix considers that a file without gfid changed less than a second # before doesn't exist, so we need to wait for a second to force posix to diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t index 86bfecb1a9..79a29e6f47 100644 --- a/tests/bugs/replicate/bug-1626994-info-split-brain.t +++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t @@ -22,13 +22,6 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 -# Create base entry in indices/xattrop -echo "Data" > $M0/FILE -rm -f $M0/FILE -EXPECT "1" count_index_entries $B0/${V0}0 -EXPECT "1" count_index_entries $B0/${V0}1 -EXPECT "1" count_index_entries $B0/${V0}2 - TEST mkdir $M0/dirty_dir TEST mkdir $M0/pending_dir @@ -37,7 +30,7 @@ TEST mkdir $M0/pending_dir TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir -create_brick_xattrop_entry $B0/${V0}0 dirty_dir +TEST create_brick_xattrop_entry $B0/${V0}0 dirty_dir # Should not show up as split-brain. EXPECT "0" afr_get_split_brain_count $V0 @@ -46,7 +39,7 @@ EXPECT "0" afr_get_split_brain_count $V0 TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0 TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1 TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2 -create_brick_xattrop_entry $B0/${V0}0 "/" +TEST create_brick_xattrop_entry $B0/${V0}0 "/" # Should not show up as split-brain. EXPECT "0" afr_get_split_brain_count $V0 @@ -55,7 +48,7 @@ EXPECT "0" afr_get_split_brain_count $V0 TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir -create_brick_xattrop_entry $B0/${V0}0 pending_dir +TEST create_brick_xattrop_entry $B0/${V0}0 pending_dir # Should show up as split-brain. EXPECT "1" afr_get_split_brain_count $V0 diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t index 1fdf7ea2da..0bd2b48cde 100644 --- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t @@ -34,10 +34,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/di setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir # Add entry to xattrop dir to trigger index heal. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` -gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) -ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +TEST create_brick_xattrop_entry $B0/$V0"0" dir EXPECT "^1$" get_pending_heal_count $V0 # Remove the gfid xattr and the link file on one brick. @@ -82,10 +79,7 @@ setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/di setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir # Add entry to xattrop dir to trigger index heal. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` -gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) -ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +TEST create_brick_xattrop_entry $B0/$V0"0" dir EXPECT "^1$" get_pending_heal_count $V0 # Remove the gfid xattr and the link file on two bricks. diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t index 3da873a922..48e46e117e 100644 --- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t @@ -66,10 +66,7 @@ setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/di setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir # Add entry to xattrop dir on first brick. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` -gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) -TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +TEST create_brick_xattrop_entry $B0/$V0"0" dir EXPECT "^1$" get_pending_heal_count $V0 diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t index c1bdf34ee6..52cb5214cb 100644 --- a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t +++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t @@ -79,21 +79,11 @@ TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0 #------------------------------------------------------------------------------- #Add entry to xattrop dir on first brick and check for split-brain. -xattrop_dir0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_dir0` - -gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1)) -TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1 gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1)) -TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1 gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3)) -gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1)) -TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1 - -gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1)) -TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1 +TEST create_brick_xattrop_entry $B0/$V0"0" file1 .shard/$gfid_f2.1 .shard/$gfid_f3.1 .shard/$gfid_f4.1 #------------------------------------------------------------------------------- #gfid split-brain won't show up in split-brain count. diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t index d3b0c504c8..8d7da8c187 100644 --- a/tests/bugs/replicate/mdata-heal-no-xattrs.t +++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t @@ -2,6 +2,7 @@ . $(dirname $0)/../../include.rc . $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc cleanup; TEST glusterd @@ -23,10 +24,7 @@ TEST [ $ret -eq 0 ] TEST chmod +x $B0/$V0"0"/FILE # Add gfid to xattrop -xattrop_b0=$(afr_get_index_path $B0/$V0"0") -base_entry_b0=`ls $xattrop_b0` -gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE)) -TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE +TEST create_brick_xattrop_entry $B0/$V0"0" FILE EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 TEST $CLI volume set $V0 cluster.self-heal-daemon on diff --git a/tests/features/index/index-link-count-lifecycle.t b/tests/features/index/index-link-count-lifecycle.t new file mode 100644 index 0000000000..618a0f712d --- /dev/null +++ b/tests/features/index/index-link-count-lifecycle.t @@ -0,0 +1,96 @@ +#!/bin/bash +#Index link-count lifecycle tests + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 +TEST $CLI volume heal $V0 disable +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +#When the bricks are started link-count should be zero if no heals are needed +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count" +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count" +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count" + +#No index file should be created when op succeeds on all bricks +echo abc > $M0/abc +TEST rm -f $M0/abc +EXPECT "^0$" count_index_entries $B0/brick0 +EXPECT "^0$" count_index_entries $B0/brick1 +EXPECT "^0$" count_index_entries $B0/brick2 +#When heal is needed xattrop-pending-count should reflect number of files to be healed +TEST kill_brick $V0 $H0 $B0/brick0 +echo abc > $M0/a +TEST ls $M0 #Perform a lookup to make sure the values are updated +EXPECT "^2$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count" +EXPECT "^2$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count" + +#Once heals are completed pending count should be back to zero +TEST $CLI volume heal $V0 enable +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count" +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count" +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count" + +cleanup; + +#Same tests for EC volume, EC doesn't fetch link-count, so it is not refreshed +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 3 $H0:$B0/brick{0,1,2} +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume start $V0 +TEST $CLI volume heal $V0 disable +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +#When the bricks are started link-count should be zero if no heals are needed +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count" +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count" +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count" + +#When heal is needed xattrop-pending-count should reflect number of files to be healed +TEST kill_brick $V0 $H0 $B0/brick0 +echo abc > $M0/a +TEST ls $M0 #EC doesn't request link-count, so the values will stay '-1' +EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count" +EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count" + +#Once heals are completed pending count should be back to zero +TEST $CLI volume heal $V0 enable +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count_shd $V0 0 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count" +#pending-count is never requested on disperse so it will be stuck at -1(i.e. cache is invalidated) after heal completes +EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick1 "xattrop-pending-count" +EXPECT "^-1$" get_value_from_brick_statedump $V0 $H0 $B0/brick2 "xattrop-pending-count" +cleanup; + +#Same tests for distribute volume +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/brick0 +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +#When the brick is started link-count should be zero +EXPECT "^0$" get_value_from_brick_statedump $V0 $H0 $B0/brick0 "xattrop-pending-count" + +cleanup; diff --git a/tests/volume.rc b/tests/volume.rc index e25e2312bf..afb296cb9e 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -838,6 +838,19 @@ function check_brick_multiplex() { fi } +function get_value_from_brick_statedump { + local vol="$1" + local host="$2" + local brick="$3" + local key="$4" + + local statedump="$(generate_brick_statedump $vol $host $brick)" + value="$(grep "$key" $statedump | cut -f2 -d'=' | tail -1)" + + rm -f "$statedump" + echo "$value" +} + function get_fd_count { local vol=$1 local host=$2 diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 68c18b4134..bebe47fdaa 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -11,6 +11,7 @@ #include <glusterfs/options.h> #include "glusterfs3-xdr.h" #include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> #include <glusterfs/syncop.h> #include <glusterfs/common-utils.h> #include "index-messages.h" @@ -422,15 +423,24 @@ index_get_link_count(index_priv_t *priv, int64_t *count, } static void -index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type) +index_update_link_count_cache(index_priv_t *priv, index_xattrop_type_t type, + int link_count_delta) { switch (type) { case XATTROP: LOCK(&priv->lock); { - priv->pending_count--; - if (priv->pending_count == 0) - priv->pending_count--; + if (priv->pending_count >= 0) { + if (link_count_delta == -1) { + priv->pending_count--; + } + /*If this is the first xattrop, then pending_count needs to + * be updated for the next lstat/lookup with link-count + * xdata*/ + if (priv->pending_count == 0) { + priv->pending_count--; /*Invalidate cache*/ + } + } } UNLOCK(&priv->lock); break; @@ -664,6 +674,9 @@ index_add(xlator_t *this, uuid_t gfid, const char *subdir, if (!ret) goto out; ret = index_link_to_base(this, gfid_path, subdir); + if (ret == 0) { + index_update_link_count_cache(priv, type, 1); + } out: return ret; } @@ -717,7 +730,10 @@ index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type) goto out; } - index_dec_link_count(priv, type); + /* If errno is ENOENT then ret won't be zero */ + if (ret == 0) { + index_update_link_count_cache(priv, type, -1); + } ret = 0; out: return ret; @@ -777,7 +793,12 @@ index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata) idx = index_find_xattr_type(d, k, v); if (idx == -1) return 0; - zfilled[idx] = 0; + + /* If an xattr value is all-zero leave zfilled[idx] as -1 so that xattrop + * index add/del won't happen */ + if (!memeqzero((const char *)v->data, v->len)) { + zfilled[idx] = 0; + } return 0; } @@ -797,7 +818,7 @@ _check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp) * zfilled[idx] will be 0(false) if value not zero. * will be 1(true) if value is zero. */ - if (mem_0filled((const char *)v->data, v->len)) { + if (!memeqzero((const char *)v->data, v->len)) { zfilled[idx] = 0; return 0; } @@ -1284,21 +1305,21 @@ index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, else x_cbk = index_xattrop64_cbk; - // In wind phase bring the gfid into index. This way if the brick crashes - // just after posix performs xattrop before _cbk reaches index xlator - // we will still have the gfid in index. + /* In wind phase bring the gfid into index. This way if the brick crashes + * just after posix performs xattrop before _cbk reaches index xlator + * we will still have the gfid in index. + */ memset(zfilled, -1, sizeof(zfilled)); - /* Foreach xattr, set corresponding index of zfilled to 1 - * zfilled[index] = 1 implies the xattr's value is zero filled - * and should be added in its corresponding subdir. + /* zfilled[index] = 0 implies the xattr's value is not zero filled + * and should be added in its corresponding index subdir. * - * zfilled should be set to 1 only for those index that - * exist in xattr variable. This is to distinguish + * zfilled should be set to 0 only for those index that + * exist in xattr variable and xattr value non-zero. This is to distinguish * between different types of volumes. * For e.g., if the check is not made, - * zfilled[DIRTY] is set to 1 for EC volumes, - * index file will be tried to create in indices/dirty dir + * zfilled[DIRTY] is set to 0 for EC volumes, + * index file will be created in indices/dirty dir * which doesn't exist for an EC volume. */ ret = dict_foreach(xattr, index_fill_zero_array, zfilled); @@ -1961,7 +1982,7 @@ out: return 0; } -int64_t +static int64_t index_fetch_link_count(xlator_t *this, index_xattrop_type_t type) { index_priv_t *priv = this->private; @@ -2023,6 +2044,7 @@ index_fetch_link_count(xlator_t *this, index_xattrop_type_t type) out: if (dirp) (void)sys_closedir(dirp); + return count; } @@ -2311,6 +2333,21 @@ out: return ret; } +static int +index_priv_dump(xlator_t *this) +{ + index_priv_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + priv = this->private; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section("%s", key_prefix); + gf_proc_dump_write("xattrop-pending-count", "%"PRId64, priv->pending_count); + + return 0; +} + int32_t mem_acct_init(xlator_t *this) { @@ -2641,7 +2678,9 @@ struct xlator_fops fops = { .fstat = index_fstat, }; -struct xlator_dumpops dumpops; +struct xlator_dumpops dumpops = { + .priv = index_priv_dump, +}; struct xlator_cbks cbks = {.forget = index_forget, .release = index_release, |