diff options
author | Pranith Kumar Karampuri <pranith.karampuri@phonepe.com> | 2021-03-10 10:43:24 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-10 10:43:24 +0530 |
commit | 46949c4951eb1d2eb0a90c21db66c31e444bffe8 (patch) | |
tree | 7bb6efd5f605551a7872b462caef8530c05adc90 /xlators | |
parent | dc9bab7959b068617ef00f355c63bdca060b9605 (diff) | |
download | glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.gz glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.tar.xz glusterfs-46949c4951eb1d2eb0a90c21db66c31e444bffe8.zip |
features/index: Optimize link-count fetching code path (#1789)
* features/index: Optimize link-count fetching code path
Problem:
AFR requests 'link-count' in lookup to check if there are any pending
heals. Based on this information, afr will set dirent->inode to NULL in
readdirp when heals are ongoing to prevent serving bad data. When heals
are completed, link-count xattr is leading to doing an opendir of
xattrop directory and then reading the contents to figure out that there
is no healing needed for every lookup. This was not detected until this
github issue because ZFS in some cases can lead to very slow readdir()
calls. Since Glusterfs does lot of lookups, this was slowing down
all operations increasing load on the system.
Code problem:
index xlator on any xattrop operation adds index to the relevant dirs
and after the xattrop operation is done, will delete/keep the index in
that directory based on the value fetched in xattrop from posix. AFR
sends all-zero xattrop for changelog xattrs. This is leading to
priv->pending_count manipulation which sets the count back to -1. Next
Lookup operation triggers opendir/readdir to find the actual link-count in
lookup because in memory priv->pending_count is -ve.
Fix:
1) Don't add to index on all-zero xattrop for a key.
2) Set pending-count to -1 when the first gfid is added into xattrop
directory, so that the next lookup can compute the link-count.
fixes: #1764
Change-Id: I8a02c7e811a72c46d78ddb2d9d4fdc2222a444e9
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
* addressed comments
Change-Id: Ide42bb1c1237b525d168bf1a9b82eb1bdc3bc283
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
* tests: Handle base index absence
Change-Id: I3cf11a8644ccf23e01537228766f864b63c49556
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
* Addressed LOCK based comments, .t comments
Change-Id: I5f53e40820cade3a44259c1ac1a7f3c5f2f0f310
Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/features/index/src/index.c | 77 |
1 files changed, 58 insertions, 19 deletions
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 68c18b4134..bebe47fdaa 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -11,6 +11,7 @@ #include <glusterfs/options.h> #include "glusterfs3-xdr.h" #include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> #include <glusterfs/syncop.h> #include <glusterfs/common-utils.h> #include "index-messages.h" @@ -422,15 +423,24 @@ index_get_link_count(index_priv_t *priv, int64_t *count, } static void -index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type) +index_update_link_count_cache(index_priv_t *priv, index_xattrop_type_t type, + int link_count_delta) { switch (type) { case XATTROP: LOCK(&priv->lock); { - priv->pending_count--; - if (priv->pending_count == 0) - priv->pending_count--; + if (priv->pending_count >= 0) { + if (link_count_delta == -1) { + priv->pending_count--; + } + /*If this is the first xattrop, then pending_count needs to + * be updated for the next lstat/lookup with link-count + * xdata*/ + if (priv->pending_count == 0) { + priv->pending_count--; /*Invalidate cache*/ + } + } } UNLOCK(&priv->lock); break; @@ -664,6 +674,9 @@ index_add(xlator_t *this, uuid_t gfid, const char *subdir, if (!ret) goto out; ret = index_link_to_base(this, gfid_path, subdir); + if (ret == 0) { + index_update_link_count_cache(priv, type, 1); + } out: return ret; } @@ -717,7 +730,10 @@ index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type) goto out; } - index_dec_link_count(priv, type); + /* If errno is ENOENT then ret won't be zero */ + if (ret == 0) { + index_update_link_count_cache(priv, type, -1); + } ret = 0; out: return ret; @@ -777,7 +793,12 @@ index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata) idx = index_find_xattr_type(d, k, v); if (idx == -1) return 0; - zfilled[idx] = 0; + + /* If an xattr value is all-zero leave zfilled[idx] as -1 so that xattrop + * index add/del won't happen */ + if (!memeqzero((const char *)v->data, v->len)) { + zfilled[idx] = 0; + } return 0; } @@ -797,7 +818,7 @@ _check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp) * zfilled[idx] will be 0(false) if value not zero. * will be 1(true) if value is zero. */ - if (mem_0filled((const char *)v->data, v->len)) { + if (!memeqzero((const char *)v->data, v->len)) { zfilled[idx] = 0; return 0; } @@ -1284,21 +1305,21 @@ index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, else x_cbk = index_xattrop64_cbk; - // In wind phase bring the gfid into index. This way if the brick crashes - // just after posix performs xattrop before _cbk reaches index xlator - // we will still have the gfid in index. + /* In wind phase bring the gfid into index. This way if the brick crashes + * just after posix performs xattrop before _cbk reaches index xlator + * we will still have the gfid in index. + */ memset(zfilled, -1, sizeof(zfilled)); - /* Foreach xattr, set corresponding index of zfilled to 1 - * zfilled[index] = 1 implies the xattr's value is zero filled - * and should be added in its corresponding subdir. + /* zfilled[index] = 0 implies the xattr's value is not zero filled + * and should be added in its corresponding index subdir. * - * zfilled should be set to 1 only for those index that - * exist in xattr variable. This is to distinguish + * zfilled should be set to 0 only for those index that + * exist in xattr variable and xattr value non-zero. This is to distinguish * between different types of volumes. * For e.g., if the check is not made, - * zfilled[DIRTY] is set to 1 for EC volumes, - * index file will be tried to create in indices/dirty dir + * zfilled[DIRTY] is set to 0 for EC volumes, + * index file will be created in indices/dirty dir * which doesn't exist for an EC volume. */ ret = dict_foreach(xattr, index_fill_zero_array, zfilled); @@ -1961,7 +1982,7 @@ out: return 0; } -int64_t +static int64_t index_fetch_link_count(xlator_t *this, index_xattrop_type_t type) { index_priv_t *priv = this->private; @@ -2023,6 +2044,7 @@ index_fetch_link_count(xlator_t *this, index_xattrop_type_t type) out: if (dirp) (void)sys_closedir(dirp); + return count; } @@ -2311,6 +2333,21 @@ out: return ret; } +static int +index_priv_dump(xlator_t *this) +{ + index_priv_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + priv = this->private; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section("%s", key_prefix); + gf_proc_dump_write("xattrop-pending-count", "%"PRId64, priv->pending_count); + + return 0; +} + int32_t mem_acct_init(xlator_t *this) { @@ -2641,7 +2678,9 @@ struct xlator_fops fops = { .fstat = index_fstat, }; -struct xlator_dumpops dumpops; +struct xlator_dumpops dumpops = { + .priv = index_priv_dump, +}; struct xlator_cbks cbks = {.forget = index_forget, .release = index_release, |