summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorPranith Kumar Karampuri <pranith.karampuri@phonepe.com>2021-03-10 10:43:24 +0530
committerGitHub <noreply@github.com>2021-03-10 10:43:24 +0530
commit46949c4951eb1d2eb0a90c21db66c31e444bffe8 (patch)
tree7bb6efd5f605551a7872b462caef8530c05adc90 /xlators
parentdc9bab7959b068617ef00f355c63bdca060b9605 (diff)
downloadglusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.gz
glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.xz
glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.zip
features/index: Optimize link-count fetching code path (#1789)
* features/index: Optimize link-count fetching code path Problem: AFR requests 'link-count' in lookup to check if there are any pending heals. Based on this information, afr will set dirent->inode to NULL in readdirp when heals are ongoing to prevent serving bad data. When heals are completed, link-count xattr is leading to doing an opendir of xattrop directory and then reading the contents to figure out that there is no healing needed for every lookup. This was not detected until this github issue because ZFS in some cases can lead to very slow readdir() calls. Since Glusterfs does lot of lookups, this was slowing down all operations increasing load on the system. Code problem: index xlator on any xattrop operation adds index to the relevant dirs and after the xattrop operation is done, will delete/keep the index in that directory based on the value fetched in xattrop from posix. AFR sends all-zero xattrop for changelog xattrs. This is leading to priv->pending_count manipulation which sets the count back to -1. Next Lookup operation triggers opendir/readdir to find the actual link-count in lookup because in memory priv->pending_count is -ve. Fix: 1) Don't add to index on all-zero xattrop for a key. 2) Set pending-count to -1 when the first gfid is added into xattrop directory, so that the next lookup can compute the link-count. fixes: #1764 Change-Id: I8a02c7e811a72c46d78ddb2d9d4fdc2222a444e9 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com> * addressed comments Change-Id: Ide42bb1c1237b525d168bf1a9b82eb1bdc3bc283 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com> * tests: Handle base index absence Change-Id: I3cf11a8644ccf23e01537228766f864b63c49556 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com> * Addressed LOCK based comments, .t comments Change-Id: I5f53e40820cade3a44259c1ac1a7f3c5f2f0f310 Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/index/src/index.c77
1 files changed, 58 insertions, 19 deletions
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 68c18b4134..bebe47fdaa 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -11,6 +11,7 @@
#include <glusterfs/options.h>
#include "glusterfs3-xdr.h"
#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
#include <glusterfs/syncop.h>
#include <glusterfs/common-utils.h>
#include "index-messages.h"
@@ -422,15 +423,24 @@ index_get_link_count(index_priv_t *priv, int64_t *count,
}
static void
-index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type)
+index_update_link_count_cache(index_priv_t *priv, index_xattrop_type_t type,
+ int link_count_delta)
{
switch (type) {
case XATTROP:
LOCK(&priv->lock);
{
- priv->pending_count--;
- if (priv->pending_count == 0)
- priv->pending_count--;
+ if (priv->pending_count >= 0) {
+ if (link_count_delta == -1) {
+ priv->pending_count--;
+ }
+ /*If this is the first xattrop, then pending_count needs to
+ * be updated for the next lstat/lookup with link-count
+ * xdata*/
+ if (priv->pending_count == 0) {
+ priv->pending_count--; /*Invalidate cache*/
+ }
+ }
}
UNLOCK(&priv->lock);
break;
@@ -664,6 +674,9 @@ index_add(xlator_t *this, uuid_t gfid, const char *subdir,
if (!ret)
goto out;
ret = index_link_to_base(this, gfid_path, subdir);
+ if (ret == 0) {
+ index_update_link_count_cache(priv, type, 1);
+ }
out:
return ret;
}
@@ -717,7 +730,10 @@ index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type)
goto out;
}
- index_dec_link_count(priv, type);
+ /* If errno is ENOENT then ret won't be zero */
+ if (ret == 0) {
+ index_update_link_count_cache(priv, type, -1);
+ }
ret = 0;
out:
return ret;
@@ -777,7 +793,12 @@ index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata)
idx = index_find_xattr_type(d, k, v);
if (idx == -1)
return 0;
- zfilled[idx] = 0;
+
+ /* If an xattr value is all-zero leave zfilled[idx] as -1 so that xattrop
+ * index add/del won't happen */
+ if (!memeqzero((const char *)v->data, v->len)) {
+ zfilled[idx] = 0;
+ }
return 0;
}
@@ -797,7 +818,7 @@ _check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp)
* zfilled[idx] will be 0(false) if value not zero.
* will be 1(true) if value is zero.
*/
- if (mem_0filled((const char *)v->data, v->len)) {
+ if (!memeqzero((const char *)v->data, v->len)) {
zfilled[idx] = 0;
return 0;
}
@@ -1284,21 +1305,21 @@ index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
else
x_cbk = index_xattrop64_cbk;
- // In wind phase bring the gfid into index. This way if the brick crashes
- // just after posix performs xattrop before _cbk reaches index xlator
- // we will still have the gfid in index.
+ /* In wind phase bring the gfid into index. This way if the brick crashes
+ * just after posix performs xattrop before _cbk reaches index xlator
+ * we will still have the gfid in index.
+ */
memset(zfilled, -1, sizeof(zfilled));
- /* Foreach xattr, set corresponding index of zfilled to 1
- * zfilled[index] = 1 implies the xattr's value is zero filled
- * and should be added in its corresponding subdir.
+ /* zfilled[index] = 0 implies the xattr's value is not zero filled
+ * and should be added in its corresponding index subdir.
*
- * zfilled should be set to 1 only for those index that
- * exist in xattr variable. This is to distinguish
+ * zfilled should be set to 0 only for those index that
+ * exist in xattr variable and xattr value non-zero. This is to distinguish
* between different types of volumes.
* For e.g., if the check is not made,
- * zfilled[DIRTY] is set to 1 for EC volumes,
- * index file will be tried to create in indices/dirty dir
+ * zfilled[DIRTY] is set to 0 for EC volumes,
+ * index file will be created in indices/dirty dir
* which doesn't exist for an EC volume.
*/
ret = dict_foreach(xattr, index_fill_zero_array, zfilled);
@@ -1961,7 +1982,7 @@ out:
return 0;
}
-int64_t
+static int64_t
index_fetch_link_count(xlator_t *this, index_xattrop_type_t type)
{
index_priv_t *priv = this->private;
@@ -2023,6 +2044,7 @@ index_fetch_link_count(xlator_t *this, index_xattrop_type_t type)
out:
if (dirp)
(void)sys_closedir(dirp);
+
return count;
}
@@ -2311,6 +2333,21 @@ out:
return ret;
}
+static int
+index_priv_dump(xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ priv = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("xattrop-pending-count", "%"PRId64, priv->pending_count);
+
+ return 0;
+}
+
int32_t
mem_acct_init(xlator_t *this)
{
@@ -2641,7 +2678,9 @@ struct xlator_fops fops = {
.fstat = index_fstat,
};
-struct xlator_dumpops dumpops;
+struct xlator_dumpops dumpops = {
+ .priv = index_priv_dump,
+};
struct xlator_cbks cbks = {.forget = index_forget,
.release = index_release,