summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2020-11-26 20:28:13 +0530
committerGitHub <noreply@github.com>2020-11-26 20:28:13 +0530
commit31fd279699b67550f56454379a91648e1c3b670a (patch)
tree945821a80bdf018ab1bff646ed93f12ef8ba3da2
parent177563263ddc0bb69b1ae8debacc900cab9fdac4 (diff)
downloadglusterfs-31fd279699b67550f56454379a91648e1c3b670a.tar.gz
glusterfs-31fd279699b67550f56454379a91648e1c3b670a.tar.xz
glusterfs-31fd279699b67550f56454379a91648e1c3b670a.zip
afr: return -EIO for gfid split-brains. (#1819)
Problem: entry-self-heal-anon-dir-off.t was failing occasionally because afr_gfid_split_brain_source() returned -1 instead of -EIO for split-brains, causing the code to proceed to afr_lookup_done(), which in turn succeeded the lookup if there was a parallel client side heal going on. Fix: Return -EIO instead of -1 so that lookp fails. Also, afr_selfheal_name() was using the same dict to get and set values. This could be problematic if the caller passed local->xdata_req, since setting a response in a request dict can lead to bugs.So changed it to use separate request and response dicts. Fixes: #1739 Credits Pranith Karampuri <pranith.karampuri@phonepe.com> Signed-off-by: Ravishankar N <ravishankar@redhat.com> Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443
-rw-r--r--xlators/cluster/afr/src/afr-common.c12
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c27
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c8
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c23
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h5
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c2
6 files changed, 42 insertions, 35 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 4b708b3aee..89ecc580e4 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3039,7 +3039,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
/* If we were called from glfsheal and there is still a gfid
* mismatch, succeed the lookup and let glfsheal print the
* response via gfid-heal-msg.*/
- if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ if (!dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg",
&gfid_heal_msg))
goto cant_interpret;
@@ -3094,7 +3094,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
goto error;
}
- ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ ret = dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg);
if (!ret) {
ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
"gfid-heal-msg", gfid_heal_msg);
@@ -3441,9 +3441,12 @@ afr_lookup_selfheal_wrap(void *opaque)
local = frame->local;
this = frame->this;
loc_pargfid(&local->loc, pargfid);
+ if (!local->xattr_rsp)
+ local->xattr_rsp = dict_new();
ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
- &local->cont.lookup.gfid_req, local->xattr_req);
+ &local->cont.lookup.gfid_req, local->xattr_req,
+ local->xattr_rsp);
if (ret == -EIO)
goto unwind;
@@ -3459,7 +3462,8 @@ afr_lookup_selfheal_wrap(void *opaque)
return 0;
unwind:
- AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, local->xattr_rsp,
+ NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 7325cef840..5eb42031e0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -245,7 +245,8 @@ int
afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
inode_t *inode, uuid_t pargfid, const char *bname,
int src_idx, int child_idx,
- unsigned char *locked_on, int *src, dict_t *xdata)
+ unsigned char *locked_on, int *src, dict_t *req,
+ dict_t *rsp)
{
afr_private_t *priv = NULL;
char g1[64] = {
@@ -266,8 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
"All the bricks should be up to resolve the gfid split "
"barin");
- if (xdata) {
- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ if (rsp) {
+ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
SALL_BRICKS_UP_TO_RESOLVE);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
@@ -277,8 +278,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
goto out;
}
- if (xdata) {
- ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
+ if (req) {
+ ret = dict_get_int32_sizen(req, "heal-op", &heal_op);
if (ret)
goto fav_child;
} else {
@@ -292,8 +293,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
SNO_BIGGER_FILE);
- if (xdata) {
- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ if (rsp) {
+ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
SNO_BIGGER_FILE);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -310,8 +311,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
SNO_DIFF_IN_MTIME);
- if (xdata) {
- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ if (rsp) {
+ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
SNO_DIFF_IN_MTIME);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -323,7 +324,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
- ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
+ ret = dict_get_str_sizen(req, "child-name", &src_brick);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
"Error getting the source "
@@ -335,8 +336,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
SERROR_GETTING_SRC_BRICK);
- if (xdata) {
- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ if (rsp) {
+ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
SERROR_GETTING_SRC_BRICK);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -400,7 +401,7 @@ out:
uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
priv->children[src_idx]->name, src_idx,
uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
- return -1;
+ return -EIO;
}
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 20b07dd534..a17dd937e1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -399,7 +399,7 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
(ia_type == replies[i].poststat.ia_type)) {
ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
bname, src_idx, i, locked_on, src,
- NULL);
+ NULL, NULL);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
"Skipping conservative merge on the "
@@ -474,7 +474,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
/* In case of type mismatch / unable to resolve gfid mismatch on the
- * entry, return -1.*/
+ * entry, return -EIO.*/
ret = afr_selfheal_detect_gfid_and_type_mismatch(
this, replies, inode, fd->inode->gfid, name, source, locked_on, &src);
@@ -905,7 +905,7 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
break;
}
- if (ret == -1) {
+ if (ret == -EIO) {
/* gfid or type mismatch. */
mismatch = _gf_true;
ret = 0;
@@ -1072,7 +1072,7 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
else
ret = afr_selfheal_entry_do_subvol(frame, this, fd, i);
- if (ret == -1) {
+ if (ret == -EIO) {
/* gfid or type mismatch. */
mismatch = _gf_true;
ret = 0;
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 834aac86d4..c5ab8d7dac 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -217,7 +217,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
int source, unsigned char *sources,
int *gfid_idx, uuid_t pargfid,
const char *bname, inode_t *inode,
- unsigned char *locked_on, dict_t *xdata)
+ unsigned char *locked_on, dict_t *req,
+ dict_t *rsp)
{
int i = 0;
int gfid_idx_iter = -1;
@@ -245,11 +246,11 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
if (sources[i] || source == -1) {
if ((sources[gfid_idx_iter] || source == -1) &&
gf_uuid_compare(gfid, gfid1)) {
- ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
- bname, gfid_idx_iter, i,
- locked_on, gfid_idx, xdata);
+ ret = afr_gfid_split_brain_source(
+ this, replies, inode, pargfid, bname, gfid_idx_iter, i,
+ locked_on, gfid_idx, req, rsp);
if (!ret && *gfid_idx >= 0) {
- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
"GFID split-brain resolved");
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -303,7 +304,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
unsigned char *sources, unsigned char *sinks,
unsigned char *healed_sinks, int source,
unsigned char *locked_on, struct afr_reply *replies,
- void *gfid_req, dict_t *xdata)
+ void *gfid_req, dict_t *req, dict_t *rsp)
{
int gfid_idx = -1;
int ret = -1;
@@ -333,7 +334,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
&gfid_idx, pargfid, bname,
- inode, locked_on, xdata);
+ inode, locked_on, req, rsp);
if (ret)
return ret;
@@ -450,7 +451,7 @@ out:
int
afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
uuid_t pargfid, const char *bname, void *gfid_req,
- dict_t *xdata)
+ dict_t *req, dict_t *rsp)
{
afr_private_t *priv = NULL;
unsigned char *sources = NULL;
@@ -505,7 +506,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
sources, sinks, healed_sinks, source,
- locked_on, replies, gfid_req, xdata);
+ locked_on, replies, gfid_req, req, rsp);
}
unlock:
afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
@@ -578,7 +579,7 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
int
afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
- void *gfid_req, dict_t *xdata)
+ void *gfid_req, dict_t *req, dict_t *rsp)
{
inode_t *parent = NULL;
call_frame_t *frame = NULL;
@@ -600,7 +601,7 @@ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
if (need_heal) {
ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
- gfid_req, xdata);
+ gfid_req, req, rsp);
if (ret)
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 48e6dbcfb1..bddbce934d 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -126,7 +126,7 @@ afr_throttled_selfheal(call_frame_t *frame, xlator_t *this);
int
afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req,
- dict_t *xdata);
+ dict_t *req, dict_t *rsp);
int
afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd);
@@ -356,7 +356,8 @@ int
afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
inode_t *inode, uuid_t pargfid, const char *bname,
int src_idx, int child_idx,
- unsigned char *locked_on, int *src, dict_t *xdata);
+ unsigned char *locked_on, int *src, dict_t *req,
+ dict_t *rsp);
int
afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
unsigned char *sinks,
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 71209637f1..93053ae4ee 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -295,7 +295,7 @@ afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent,
{
int ret = -1;
- ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL);
+ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL, NULL);
return ret;
}