summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2021-03-25 11:52:13 +0530
committerGitHub <noreply@github.com>2021-03-25 11:52:13 +0530
commit51c91bea1d844544bd17b13c4ee4d2beeacb2e3b (patch)
tree8495dc66de79e3b504092792c43c0711e24ae73f
parent9bd2c697686ec40e2c4f711df961860c8a735baa (diff)
downloadglusterfs-51c91bea1d844544bd17b13c4ee4d2beeacb2e3b.tar.gz
glusterfs-51c91bea1d844544bd17b13c4ee4d2beeacb2e3b.tar.xz
glusterfs-51c91bea1d844544bd17b13c4ee4d2beeacb2e3b.zip
afr: make fsync post-op aware of inodelk count (#2273)
Problem: Since commit bd540db1e, eager-locking was enabled for fsync. But on certain VM workloads wit sharding enabled, shard xlator keeps sending fsync on the base shard. This can cause blocked inodelks from other clients (including shd) to time out due to call bail. Fix: Make afr fsync aware of inodelk count and not delay post-op + unlock when inodelk count > 1, just like writev. Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request. Note: We do not know yet why VMs go in to paused state because of the blocked inodelks but this patch should be a first step in reducing the occurence. Updates: #2198 Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b Signed-off-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c40
-rw-r--r--xlators/features/locks/src/posix.c1
2 files changed, 24 insertions, 17 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 1d6e4f3570..f9ec698c75 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -36,6 +36,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
struct iatt *stbuf = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_lock_t *lock = NULL;
afr_read_subvol_args_t args = {
0,
};
@@ -44,6 +45,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
+
/*This code needs to stay till DHT sends fops on linked
* inodes*/
if (!inode_is_linked(local->inode)) {
@@ -128,6 +135,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int num_inodelks = 0;
local = frame->local;
priv = this->private;
@@ -140,8 +148,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->replies[child_index].op_ret = op_ret;
local->replies[child_index].op_errno = op_errno;
- if (xdata)
+ if (xdata) {
local->replies[child_index].xdata = dict_ref(xdata);
+ if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+ &num_inodelks) == 0) {
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
+ }
+ }
+ }
if (op_ret >= 0) {
if (prebuf)
@@ -277,7 +293,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
- int32_t num_inodelks = 0;
LOCK(&frame->lock);
{
@@ -299,15 +314,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->open_fd_count = open_fd_count;
local->update_open_fd_count = _gf_true;
}
-
- ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
- &num_inodelks);
- if (ret < 0)
- goto unlock;
- if (num_inodelks > local->num_inodelks) {
- local->num_inodelks = num_inodelks;
- local->update_num_inodelks = _gf_true;
- }
}
unlock:
UNLOCK(&frame->lock);
@@ -317,7 +323,6 @@ void
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_lock_t *lock = NULL;
local = frame->local;
@@ -336,11 +341,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
if (local->update_open_fd_count)
local->inode_ctx->open_fd_count = local->open_fd_count;
- if (local->update_num_inodelks &&
- local->transaction.type == AFR_DATA_TRANSACTION) {
- lock = &local->inode_ctx->lock[local->transaction.type];
- lock->num_inodelks = local->num_inodelks;
- }
}
int
@@ -2531,6 +2531,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local->xdata_req)
goto out;
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 2bd4aa2864..8888bc4f61 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -4959,6 +4959,7 @@ struct xlator_fops fops = {
.rchecksum = pl_rchecksum,
.statfs = pl_statfs,
.fsyncdir = pl_fsyncdir,
+ .fsync = pl_fsync,
.readdir = pl_readdir,
.symlink = pl_symlink,
.link = pl_link,