diff options
author | karthik-us <ksubrahm@redhat.com> | 2017-06-07 15:56:13 +0530 |
---|---|---|
committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-07-18 15:24:54 +0000 |
commit | 657d78dbad118e511e1fca8b1badb9f8ae7a6f60 (patch) | |
tree | 5c2001d6dab7536d8ffeccb1dafc6e5585d7d07c /xlators/cluster/afr/src | |
parent | ae14513eb82929662b11e4c304877030a7d685cd (diff) | |
download | glusterfs-657d78dbad118e511e1fca8b1badb9f8ae7a6f60.tar.gz glusterfs-657d78dbad118e511e1fca8b1badb9f8ae7a6f60.tar.xz glusterfs-657d78dbad118e511e1fca8b1badb9f8ae7a6f60.zip |
cluster/afr: GFID split-brain resolution with existing CLI
Problem:
Currently there is no way for the admin from CLI to resolve gfid
split-brain based on some policy like choice of the brick, mtime
or size.
Fix:
With the existing CLI options based on size, mtime, and choice of
brick, we do lookup on the parent for the specified file. As
part of the lookup, if we find gfid mismatch, we resolve them
based on the policy and return. If the file is not in gfid split-
brain, then we check for the data and metadata split-brain in the
getxattr code path, and resolve if any.
This will work provided absolute path to the file with the CLI
and not with gfid of the file. Hence the source-brick policy
without any file path will also not resolve the gfid split-brain
since it uses the gfid of the files. But it can resolve any other
type of split-brains and skip the gfid mismatch resolution with
the usual error message.
Reverting the change https://review.gluster.org/17290. This patch
resolves the issue.
Fixes gluster/glusterfs#135
Change-Id: Iaeba6fc32f184a34255d03be87cda02773130a09
BUG: 1459530
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://review.gluster.org/17485
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 18 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 250 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 107 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 65 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 2 |
6 files changed, 315 insertions, 135 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 064320441b..cba18b2ff8 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2114,6 +2114,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) int op_errno = 0; int read_subvol = 0; int par_read_subvol = 0; + int ret = -1; unsigned char *readable = NULL; int event = 0; struct afr_reply *replies = NULL; @@ -2124,6 +2125,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) int spb_choice = -1; ia_type_t ia_type = IA_INVAL; afr_read_subvol_args_t args = {0,}; + char *gfid_heal_msg = NULL; priv = this->private; local = frame->local; @@ -2258,6 +2260,19 @@ unwind: local->op_errno = ENOTCONN; } + ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); + if (!ret) { + ret = dict_set_str (local->replies[read_subvol].xdata, + "gfid-heal-msg", gfid_heal_msg); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error setting gfid-heal-msg dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; + } + } + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, &local->replies[read_subvol].poststat, local->replies[read_subvol].xdata, @@ -2520,7 +2535,7 @@ afr_lookup_selfheal_wrap (void *opaque) loc_pargfid (&local->loc, pargfid); ret = afr_selfheal_name (frame->this, pargfid, local->loc.name, - &local->cont.lookup.gfid_req); + &local->cont.lookup.gfid_req, local->xattr_req); if (ret == -EIO) goto unwind; @@ -2581,6 +2596,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) } if (need_heal) { + heal = copy_frame (frame); if (heal) heal->root->pid = GF_CLIENT_PID_SELF_HEALD; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 6b5e50d6c5..9ecd63ce10 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -20,6 +20,256 @@ void afr_heal_synctask (xlator_t *this, afr_local_t *local); int +afr_gfid_sbrain_source_from_src_brick (xlator_t *this, + struct afr_reply *replies, + char *src_brick) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (strcmp (priv->children[i]->name, src_brick) == 0) + return i; + } + return -1; +} + +int +afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, + int child_count) +{ + int j = 0; + int i = 0; + int src = -1; + int votes[child_count]; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + + votes[i] = 1; + for (j = i+1; j < child_count; j++) { + if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, + replies[j].poststat.ia_gfid))) + votes[i]++; + if (votes[i] > child_count / 2) { + src = i; + goto out; + } + } + } + +out: + return src; +} + +int afr_gfid_sbrain_source_from_bigger_file (struct afr_reply *replies, + int child_count) +{ + int i = 0; + int src = -1; + uint64_t size = 0; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (size < replies[i].poststat.ia_size) { + src = i; + size = replies[i].poststat.ia_size; + } else if (replies[i].poststat.ia_size == size) { + src = -1; + } + } + return src; +} + +int afr_gfid_sbrain_source_from_latest_mtime (struct afr_reply *replies, + int child_count) +{ + int i = 0; + int src = -1; + uint32_t mtime = 0; + uint32_t mtime_nsec = 0; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; + if ((mtime < replies[i].poststat.ia_mtime) || + ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { + src = i; + mtime = replies[i].poststat.ia_mtime; + mtime_nsec = replies[i].poststat.ia_mtime_nsec; + } else if ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) { + src = -1; + } + } + return src; +} + +int +afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, + unsigned char *locked_on, int *src, dict_t *xdata) +{ + afr_private_t *priv = NULL; + char g1[64] = {0,}; + char g2[64] = {0,}; + int up_count = 0; + int heal_op = -1; + int ret = -1; + char *src_brick = NULL; + + *src = -1; + priv = this->private; + up_count = AFR_COUNT (locked_on, priv->child_count); + if (up_count != priv->child_count) { + gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "All the bricks should be up to resolve the gfid split " + "barin"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", "All the " + "bricks should be up to resolve the" + " gfid split barin"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error setting" + " gfid-heal-msg dict"); + } + goto out; + } + + if (xdata) { + ret = dict_get_int32 (xdata, "heal-op", &heal_op); + if (ret) + goto fav_child; + } else { + goto fav_child; + } + + switch (heal_op) { + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + *src = afr_gfid_sbrain_source_from_bigger_file (replies, + priv->child_count); + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "No bigger file"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", + "No bigger file"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error" + " setting gfid-heal-msg dict"); + } + } + break; + + case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME: + *src = afr_gfid_sbrain_source_from_latest_mtime (replies, + priv->child_count); + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "No difference in mtime"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", + "No difference in mtime"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error" + "setting gfid-heal-msg dict"); + } + } + break; + + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: + ret = dict_get_str (xdata, "child-name", &src_brick); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "Error getting the source " + "brick"); + break; + } + *src = afr_gfid_sbrain_source_from_src_brick (this, replies, + src_brick); + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "Error getting the source " + "brick"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", + "Error getting the source " + "brick"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error" + " setting gfid-heal-msg dict"); + } + } + break; + + default: + break; + } + goto out; + +fav_child: + switch (priv->fav_child_policy) { + case AFR_FAV_CHILD_BY_SIZE: + *src = afr_sh_fav_by_size (this, replies, inode); + break; + case AFR_FAV_CHILD_BY_MTIME: + *src = afr_sh_fav_by_mtime (this, replies, inode); + break; + case AFR_FAV_CHILD_BY_CTIME: + *src = afr_sh_fav_by_ctime(this, replies, inode); + break; + case AFR_FAV_CHILD_BY_MAJORITY: + if (priv->child_count != 2) + *src = afr_selfheal_gfid_mismatch_by_majority (replies, + priv->child_count); + else + *src = -1; + + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "No majority to resolve " + "gfid split brain"); + } + break; + default: + break; + } + +out: + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and" + " %s on %s.", uuid_utoa (pargfid), bname, + uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), + priv->children[child_idx]->name, + uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), + priv->children[src_idx]->name); + gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" + "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" + "child-%d=%s;gfid-%d=%s", this->name, + uuid_utoa (pargfid), bname, child_idx, + priv->children[child_idx]->name, child_idx, + uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), + src_idx, priv->children[src_idx]->name, src_idx, + uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); + return -1; + } + return 0; +} + + +int afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 82ae6432d7..d7e9e60a7b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -17,105 +17,6 @@ #include "syncop-utils.h" #include "events.h" -int -afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, - int child_count) -{ - int j = 0; - int i = 0; - int src = -1; - int votes[child_count]; - - for (i = 0; i < child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - - votes[i] = 1; - for (j = i+1; j < child_count; j++) { - if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[j].poststat.ia_gfid))) - votes[i]++; - if (votes[i] > child_count / 2) { - src = i; - goto out; - } - } - } - -out: - return src; -} - -int -afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, - inode_t *inode, uuid_t pargfid, char *bname, - int src_idx, int child_idx, - unsigned char *locked_on, int *src) -{ - afr_private_t *priv = NULL; - char g1[64] = {0,}; - char g2[64] = {0,}; - int up_count = 0; - - priv = this->private; - up_count = AFR_COUNT (locked_on, priv->child_count); - if (up_count != priv->child_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, - "All the bricks should be up to resolve the gfid split " - "brain"); - goto out; - } - switch (priv->fav_child_policy) { - case AFR_FAV_CHILD_BY_SIZE: - *src = afr_sh_fav_by_size (this, replies, inode); - break; - case AFR_FAV_CHILD_BY_MTIME: - *src = afr_sh_fav_by_mtime (this, replies, inode); - break; - case AFR_FAV_CHILD_BY_CTIME: - *src = afr_sh_fav_by_ctime(this, replies, inode); - break; - case AFR_FAV_CHILD_BY_MAJORITY: - if (priv->child_count != 2) - *src = afr_selfheal_gfid_mismatch_by_majority (replies, - priv->child_count); - else - *src = -1; - - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "No majority to resolve " - "gfid split brain"); - } - break; - default: - break; - } - -out: - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, - "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and" - " %s on %s. Skipping conservative merge on the file.", - uuid_utoa (pargfid), bname, - uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), - priv->children[child_idx]->name, - uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), - priv->children[src_idx]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" - "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" - "child-%d=%s;gfid-%d=%s", this->name, - uuid_utoa (pargfid), bname, child_idx, - priv->children[child_idx]->name, child_idx, - uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), - src_idx, priv->children[src_idx]->name, src_idx, - uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); - return -1; - } - return 0; -} - static int afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name, inode_t *inode, int child, struct afr_reply *replies) @@ -332,7 +233,13 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, ret = afr_gfid_split_brain_source (this, replies, inode, pargfid, bname, src_idx, i, - locked_on, src); + locked_on, src, + NULL); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, + "Skipping conservative merge on the " + "file."); return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 8372cb6e37..1d198a8883 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -330,14 +330,15 @@ static int afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies, int source, unsigned char *sources, int *gfid_idx, uuid_t pargfid, - const char *bname) + const char *bname, inode_t *inode, + unsigned char *locked_on, dict_t *xdata) { int i = 0; int gfid_idx_iter = -1; + int ret = -1; void *gfid = NULL; void *gfid1 = NULL; afr_private_t *priv = NULL; - char g1[64], g2[64]; priv = this->private; @@ -358,31 +359,29 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies if (sources[i] || source == -1) { if ((sources[gfid_idx_iter] || source == -1) && gf_uuid_compare (gfid, gfid1)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN, - "GFID mismatch for <gfid:%s>/%s " - "%s on %s and %s on %s", - uuid_utoa (pargfid), bname, - uuid_utoa_r (gfid1, g1), - priv->children[i]->name, - uuid_utoa_r (gfid, g2), - priv->children[gfid_idx_iter]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, - "subvol=%s;type=gfid;" - "file=<gfid:%s>/%s;count=2;" - "child-%d=%s;gfid-%d=%s;child-%d=%s;" - "gfid-%d=%s", this->name, - uuid_utoa (pargfid), bname, i, - priv->children[i]->name, i, - uuid_utoa_r (gfid1, g1), - gfid_idx_iter, - priv->children[gfid_idx_iter]->name, - gfid_idx_iter, - uuid_utoa_r (gfid, g2)); - - return -EIO; + ret = afr_gfid_split_brain_source (this, + replies, + inode, + pargfid, + bname, + gfid_idx_iter, + i, locked_on, + gfid_idx, + xdata); + if (!ret && *gfid_idx >= 0) { + ret = dict_set_str (xdata, + "gfid-heal-msg", + "GFID split-brain " + "resolved"); + if (ret) + gf_msg (this->name, + GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error setting gfid-" + "heal-msg dict"); + } + return ret; } - gfid = &replies[i].poststat.ia_gfid; gfid_idx_iter = i; } @@ -427,7 +426,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, int source, unsigned char *locked_on, struct afr_reply *replies, - void *gfid_req) + void *gfid_req, dict_t *xdata) { int gfid_idx = -1; int ret = -1; @@ -458,7 +457,8 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source, sources, &gfid_idx, - pargfid, bname); + pargfid, bname, inode, + locked_on, xdata); if (ret) return ret; @@ -583,7 +583,8 @@ out: int afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, void *gfid_req) + uuid_t pargfid, const char *bname, void *gfid_req, + dict_t *xdata) { afr_private_t *priv = NULL; unsigned char *sources = NULL; @@ -640,7 +641,7 @@ afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, ret = __afr_selfheal_name_do (frame, this, parent, pargfid, bname, inode, sources, sinks, healed_sinks, source, locked_on, - replies, gfid_req); + replies, gfid_req, xdata); } unlock: afr_selfheal_unentrylk (frame, this, parent, this->name, bname, @@ -707,7 +708,7 @@ afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this, int afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, - void *gfid_req) + void *gfid_req, dict_t *xdata) { inode_t *parent = NULL; call_frame_t *frame = NULL; @@ -729,7 +730,7 @@ afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, if (need_heal) { ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname, - gfid_req); + gfid_req, xdata); if (ret) goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 2e22ac2d7a..36f081ec35 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -99,7 +99,7 @@ afr_throttled_selfheal (call_frame_t *frame, xlator_t *this); int afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name, - void *gfid_req); + void *gfid_req, dict_t *xdata); int afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode); @@ -330,4 +330,10 @@ int afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies, inode_t *inode); +int +afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, + unsigned char *locked_on, int *src, dict_t *xdata); + #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index e1a4052170..08817202b3 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -301,7 +301,7 @@ afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent, { int ret = -1; - ret = afr_selfheal_name (THIS, parent, bname, NULL); + ret = afr_selfheal_name (THIS, parent, bname, NULL, NULL); return ret; } |