diff options
author | Krutika Dhananjay <kdhananj@redhat.com> | 2019-01-24 14:14:39 +0530 |
---|---|---|
committer | Xavi Hernandez <xhernandez@redhat.com> | 2019-01-24 18:14:57 +0000 |
commit | 72922c1fd69191b220f79905a23395c3a87f86ce (patch) | |
tree | 853a020c4659013434ae667d59c00a96fca63f7a /xlators/features | |
parent | 99b3ab0cf3d3389a2ff89c29cfff906cd36693a3 (diff) | |
download | glusterfs-72922c1fd69191b220f79905a23395c3a87f86ce.tar.gz glusterfs-72922c1fd69191b220f79905a23395c3a87f86ce.tar.xz glusterfs-72922c1fd69191b220f79905a23395c3a87f86ce.zip |
features/shard: Ref shard inode while adding to fsync list
PROBLEM:
Lot of the earlier changes in the management of shards in lru, fsync
lists assumed that if a given shard exists in fsync list, it must be
part of lru list as well. This was found to be not true.
Consider this - a file is FALLOCATE'd to a size which would make the
number of participant shards to be greater than the lru list size.
In this case, some of the resolved shards that are to participate in
this fop will be evicted from lru list to give way to the rest of the
shards. And once FALLOCATE completes, these shards are added to fsync
list but without a ref. After the fop completes, these shard inodes
are unref'd and destroyed while their inode ctxs are still part of
fsync list. Now when an FSYNC is called on the base file and the
fsync-list traversed, the client crashes due to illegal memory access.
FIX:
Hold a ref on the shard inode when adding to fsync list as well.
And unref under following conditions:
1. when the shard is evicted from lru list
2. when the base file is fsync'd
3. when the shards are deleted.
Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
fixes: bz#1669077
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Diffstat (limited to 'xlators/features')
-rw-r--r-- | xlators/features/shard/src/shard.c | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index abea8dc64a..fa3564a460 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -273,6 +273,7 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, * of the to_fsync_list. */ inode_ref(base_inode); + inode_ref(shard_inode); LOCK(&base_inode->lock); LOCK(&shard_inode->lock); @@ -286,8 +287,10 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, /* Unref the base inode corresponding to the ref above, if the shard is * found to be already part of the fsync list. */ - if (ret != 0) + if (ret != 0) { inode_unref(base_inode); + inode_unref(shard_inode); + } return ret; } @@ -734,6 +737,10 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); inode_forget(lru_inode, 0); } else { + /* The following unref corresponds to the ref + * held when the shard was added to fsync list. + */ + inode_unref(lru_inode); fsync_inode = lru_inode; if (lru_base_inode) inode_unref(lru_base_inode); @@ -2932,8 +2939,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) shard_priv_t *priv = NULL; shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; - gf_boolean_t unlink_unref_forget = _gf_false; int unref_base_inode = 0; + int unref_shard_inode = 0; this = THIS; priv = this->private; @@ -2958,11 +2965,12 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) list_del_init(&ctx->ilist); priv->inode_count--; unref_base_inode++; + unref_shard_inode++; GF_ASSERT(priv->inode_count >= 0); - unlink_unref_forget = _gf_true; } if (ctx->fsync_needed) { unref_base_inode++; + unref_shard_inode++; list_del_init(&ctx->to_fsync_list); if (base_inode) { __shard_inode_ctx_get(base_inode, this, &base_ictx); @@ -2973,11 +2981,11 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) UNLOCK(&inode->lock); if (base_inode) UNLOCK(&base_inode->lock); - if (unlink_unref_forget) { - inode_unlink(inode, priv->dot_shard_inode, block_bname); - inode_unref(inode); - inode_forget(inode, 0); - } + + inode_unlink(inode, priv->dot_shard_inode, block_bname); + inode_ref_reduce_by_n(inode, unref_shard_inode); + inode_forget(inode, 0); + if (base_inode && unref_base_inode) inode_ref_reduce_by_n(base_inode, unref_base_inode); UNLOCK(&priv->lock); @@ -5793,6 +5801,7 @@ shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; inode_t *base_inode = NULL; + gf_boolean_t unref_shard_inode = _gf_false; local = frame->local; base_inode = local->fd->inode; @@ -5826,11 +5835,16 @@ out: if (ctx->fsync_needed != 0) { list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); base_ictx->fsync_count++; + } else { + unref_shard_inode = _gf_true; } } UNLOCK(&anon_fd->inode->lock); UNLOCK(&base_inode->lock); } + + if (unref_shard_inode) + inode_unref(anon_fd->inode); if (anon_fd) fd_unref(anon_fd); |