diff options
| author | Ashish Pandey <aspandey@redhat.com> | 2016-09-20 12:32:28 +0530 |
|---|---|---|
| committer | Xavier Hernandez <xhernandez@datalab.es> | 2016-10-11 02:29:27 -0700 |
| commit | 0fed7e7f0aad9973900c89434f736797d9ace2bd (patch) | |
| tree | b7e02dfd5c6e21a29b5fe122bf911b10c42a611c | |
| parent | 85e959052148ec481823d55c8b91cdee36da2b43 (diff) | |
cluster/ec: Implement heal info with lock
Problem: Currently heal info command prints all
the files/directories if the index for the
file/directory is present in .glusterfs/indices folder.
After implementing patch http://review.gluster.org/#/c/13733/
indices of the file which is going through update fop
will also be present in .glusterfs/indices even
if the fop is successful on all the brick. At this time
if heal info command is being used, it will also display this
file which is actually healthy and does not require any heal.
Solution: Take lock on a file corresponding to the indices
and inspect xattrs to decide if the file needs heal or not.
Change-Id: I6361e2813ece369be12d02e74816df4eddb81cfa
BUG: 1366815
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: http://review.gluster.org/15543
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
| -rw-r--r-- | libglusterfs/src/syncop-utils.c | 35 | ||||
| -rw-r--r-- | libglusterfs/src/syncop-utils.h | 4 | ||||
| -rw-r--r-- | tests/afr.rc | 5 | ||||
| -rw-r--r-- | tests/basic/ec/ec-background-heals.t | 6 | ||||
| -rw-r--r-- | tests/volume.rc | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 27 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 3 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 254 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-heald.c | 46 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec.c | 10 |
10 files changed, 309 insertions, 86 deletions
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c index 029f10b058..fa9e6a2876 100644 --- a/libglusterfs/src/syncop-utils.c +++ b/libglusterfs/src/syncop-utils.c @@ -591,3 +591,38 @@ out: return ret; } + +int +syncop_inode_find (xlator_t *this, xlator_t *subvol, + uuid_t gfid, inode_t **inode, + dict_t *xdata, dict_t **rsp_dict) +{ + int ret = 0; + loc_t loc = {0, }; + struct iatt iatt = {0, }; + *inode = NULL; + + *inode = inode_find (this->itable, gfid); + if (*inode) + goto out; + + loc.inode = inode_new (this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + gf_uuid_copy (loc.gfid, gfid); + + ret = syncop_lookup (subvol, &loc, &iatt, NULL, xdata, rsp_dict); + if (ret < 0) + goto out; + + *inode = inode_link (loc.inode, NULL, NULL, &iatt); + if (!*inode) { + ret = -ENOMEM; + goto out; + } +out: + loc_wipe (&loc); + return ret; +} diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/syncop-utils.h index 3968d758c6..4761371c12 100644 --- a/libglusterfs/src/syncop-utils.h +++ b/libglusterfs/src/syncop-utils.h @@ -43,4 +43,8 @@ syncop_ftw_throttle (xlator_t *subvol, loc_t *loc, int pid, void *data, int (*fn) (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, void *data), int count, int sleep_time); +int +syncop_inode_find (xlator_t *this, xlator_t *subvol, + uuid_t gfid, inode_t **inode, + dict_t *xdata, dict_t **rsp_dict); #endif /* _SYNCOP_H */ diff --git a/tests/afr.rc b/tests/afr.rc index ed376f0b41..bdf4075a23 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -85,11 +85,6 @@ function is_file_heal_done { #count the number of entries marked for self-heal #in brick $1's index -function count_sh_entries() -{ - ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l -} - function count_index_entries() { ls $1/.glusterfs/indices/xattrop | wc -l diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t index 7ac6c0efc1..eb434908ba 100644 --- a/tests/basic/ec/ec-background-heals.t +++ b/tests/basic/ec/ec-background-heals.t @@ -23,7 +23,10 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength TEST touch $M0/a -EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" get_pending_heal_count $V0 #One for each active brick +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2 + TEST kill_brick $V0 $H0 $B0/${V0}2 echo abc > $M0/a EXPECT 2 get_pending_heal_count $V0 #One for each active brick @@ -31,7 +34,6 @@ $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 #Accessing file shouldn't heal the file EXPECT "abc" cat $M0/a -sleep 3 EXPECT 2 get_pending_heal_count $V0 #One for each active brick TEST $CLI volume set $V0 disperse.background-heals 1 EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals diff --git a/tests/volume.rc b/tests/volume.rc index f9725b2cba..8cad822e9a 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -710,3 +710,8 @@ function get_hard_link_count { local path=$1; stat -c %h $path } + +function count_sh_entries() +{ + ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l +} diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 8a3a552140..e1a4052170 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -153,22 +153,11 @@ unlock: inode_t * afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) { - int ret = 0; - uint64_t val = IA_INVAL; - loc_t loc = {0, }; + int ret = 0; + uint64_t val = IA_INVAL; dict_t *xdata = NULL; dict_t *rsp_dict = NULL; - inode_t *inode = NULL; - struct iatt iatt = {0, }; - - inode = inode_find (this->itable, gfid); - if (inode) - goto out; - - loc.inode = inode_new (this->itable); - if (!loc.inode) - goto out; - gf_uuid_copy (loc.gfid, gfid); + inode_t *inode = NULL; xdata = dict_new (); if (!xdata) @@ -178,7 +167,8 @@ afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) if (ret) goto out; - ret = syncop_lookup (subvol, &loc, &iatt, NULL, xdata, &rsp_dict); + ret = syncop_inode_find (this, subvol, gfid, &inode, + xdata, &rsp_dict); if (ret < 0) goto out; @@ -188,15 +178,16 @@ afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) if (ret) goto out; } - - inode = inode_link (loc.inode, NULL, NULL, &iatt); ret = inode_ctx_set2 (inode, subvol, 0, &val); out: + if (ret && inode) { + inode_unref (inode); + inode = NULL; + } if (xdata) dict_unref (xdata); if (rsp_dict) dict_unref (rsp_dict); - loc_wipe (&loc); return inode; } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index d720d24adc..5851b5d57b 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -118,4 +118,7 @@ void ec_manager(ec_fop_data_t * fop, int32_t error); gf_boolean_t ec_is_recoverable_error (int32_t op_errno); void ec_handle_healers_done (ec_fop_data_t *fop); +int32_t +ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict); + #endif /* __EC_COMMON_H__ */ diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 1425561683..bac8337cd3 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -1489,22 +1489,29 @@ unlock: return ret; } -/*Data heal*/ +/*Find direction for data heal and heal info*/ int ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies, - uint64_t *versions, uint64_t *dirty, - uint64_t *size, unsigned char *sources, - unsigned char *healed_sinks) + uint64_t *data_versions, uint64_t *meta_versions, + uint64_t *dirty, uint64_t *size, unsigned char *sources, + unsigned char *healed_sinks, int which) { uint64_t xattr[EC_VERSION_SIZE] = {0}; - char version_size[64] = {0}; + char version_size[128] = {0}; dict_t *version_size_db = NULL; + uint64_t *m_versions = NULL; unsigned char *same = NULL; int max_same_count = 0; int source = 0; int i = 0; int ret = 0; + dict_t *dict = NULL; + if (!meta_versions) { + m_versions = alloca0 (ec->nodes * sizeof (*m_versions)); + } else { + m_versions = meta_versions; + } version_size_db = dict_new (); if (!version_size_db) { ret = -ENOMEM; @@ -1516,23 +1523,31 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies, continue; if (replies[i].op_ret < 0) continue; - ret = ec_dict_del_array (replies[i].xattr, EC_XATTR_VERSION, + dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata : + replies[i].xattr; + + ret = ec_dict_del_array (dict, EC_XATTR_VERSION, xattr, EC_VERSION_SIZE); if (ret == 0) { - versions[i] = xattr[EC_DATA_TXN]; + data_versions[i] = xattr[EC_DATA_TXN]; + if (meta_versions) { + m_versions[i] = xattr[EC_METADATA_TXN]; + } } memset (xattr, 0, sizeof (xattr)); - ret = ec_dict_del_array (replies[i].xattr, EC_XATTR_DIRTY, + ret = ec_dict_del_array (dict, EC_XATTR_DIRTY, xattr, EC_VERSION_SIZE); if (ret == 0) { dirty[i] = xattr[EC_DATA_TXN]; } - ret = ec_dict_del_number (replies[i].xattr, EC_XATTR_SIZE, + ret = ec_dict_del_number (dict, EC_XATTR_SIZE, &size[i]); - /*Build a db of same version, size*/ + /*Build a db of same metadata and data version and size*/ snprintf (version_size, sizeof (version_size), - "%"PRIu64"-%"PRIu64, versions[i], size[i]); + "%"PRIu64"-%"PRIu64"-%"PRIu64, data_versions[i], + m_versions[i], size[i]); + ret = dict_get_bin (version_size_db, version_size, (void **)&same); if (ret < 0) { @@ -1562,7 +1577,11 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies, goto out; } else { snprintf (version_size, sizeof (version_size), - "%"PRIu64"-%"PRIu64, versions[source], size[source]); + "%"PRIu64"-%"PRIu64"-%"PRIu64, + data_versions[source], + m_versions[source], + size[source]); + ret = dict_get_bin (version_size_db, version_size, (void **)&same); if (ret < 0) @@ -1621,8 +1640,9 @@ __ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd, goto out; } - source = ec_heal_data_find_direction (ec, replies, versions, dirty, - size, sources, healed_sinks); + source = ec_heal_data_find_direction (ec, replies, versions, NULL, + dirty, size, sources, + healed_sinks, EC_COMBINE_DICT); ret = source; if (ret < 0) goto out; @@ -2602,7 +2622,7 @@ out: int32_t ec_launch_replace_heal (ec_t *ec) { - int ret = -1; + int ret = -1; if (!ec) return ret; @@ -2614,3 +2634,207 @@ ec_launch_replace_heal (ec_t *ec) } return ret; } + +int32_t +ec_set_heal_info(dict_t **dict_rsp, char *status) +{ + dict_t *dict = NULL; + int ret = 0; + + dict = dict_new (); + if (!dict) { + ret = -ENOMEM; + goto out; + } + ret = dict_set_str (dict, "heal-info", status); + if (ret) { + gf_msg (THIS->name, GF_LOG_WARNING, -ret, + EC_MSG_HEAL_FAIL, + "Failed to set heal-info key to " + "%s", status); + dict_unref(dict); + dict = NULL; + } + *dict_rsp = dict; +out: + return ret; +} + +int32_t +ec_need_heal (ec_t *ec, default_args_cbk_t *replies, gf_boolean_t *need_heal) +{ + uint64_t *dirty = NULL; + unsigned char *sources = NULL; + unsigned char *healed_sinks = NULL; + uint64_t *data_versions = NULL; + uint64_t *meta_versions = NULL; + uint64_t *size = NULL; + int ret = 0; + int source_count = 0; + + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + dirty = alloca0 (ec->nodes * sizeof (*dirty)); + size = alloca0 (ec->nodes * sizeof (*size)); + data_versions = alloca0 (ec->nodes * sizeof (*data_versions)); + meta_versions = alloca0 (ec->nodes * sizeof (*meta_versions)); + + ret = ec_heal_data_find_direction (ec, replies, data_versions, + meta_versions, dirty, size, + sources, healed_sinks, + EC_COMBINE_XDATA); + if (ret < 0 && ret != -EIO) { + goto out; + } + source_count = EC_COUNT (sources, ec->nodes); + if (source_count != ec->nodes) { + *need_heal = _gf_true; + } + ret = source_count; +out: + return ret; +} + +int32_t +ec_heal_inspect (call_frame_t *frame, ec_t *ec, + inode_t *inode, unsigned char *locked_on, + gf_boolean_t *need_heal) +{ + loc_t loc = {0}; + int ret = 0; + dict_t *xdata = NULL; + uint64_t zero_array[2] = {0}; + uint64_t zero_value = 0; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + + EC_REPLIES_ALLOC (replies, ec->nodes); + output = alloca0 (ec->nodes); + + loc.inode = inode_ref (inode); + gf_uuid_copy (loc.gfid, inode->gfid); + + xdata = dict_new (); + if (!xdata || + dict_set_static_bin (xdata, EC_XATTR_VERSION, zero_array, + sizeof (zero_array)) || + dict_set_static_bin (xdata, EC_XATTR_DIRTY, zero_array, + sizeof (zero_array)) || + dict_set_static_bin (xdata, EC_XATTR_SIZE, &zero_value, + sizeof (zero_value))) { + ret = -ENOMEM; + goto out; + } + ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies, + output, frame, ec->xl, &loc, xdata); + if (ret != ec->nodes) { + ret = ec->nodes; + *need_heal = _gf_true; + goto out; + } + ret = ec_need_heal (ec, replies, need_heal); + +out: + cluster_replies_wipe (replies, ec->nodes); + loc_wipe (&loc); + if (xdata) { + dict_unref(xdata); + } + return ret; +} + +int32_t +ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode, + gf_boolean_t *need_heal) +{ + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + int ret = 0; + + EC_REPLIES_ALLOC (replies, ec->nodes); + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + up_subvols = alloca0(ec->nodes); + ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); + + ret = cluster_inodelk (ec->xl_list, up_subvols, ec->nodes, + replies, locked_on, frame, ec->xl, + ec->xl->name, inode, 0, 0); + if (ret != ec->nodes) { + *need_heal = _gf_true; + goto unlock; + } + ret = ec_heal_inspect (frame, ec, inode, + locked_on, need_heal); +unlock: + cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, + replies, output, frame, ec->xl, + ec->xl->name, inode, 0, 0); + cluster_replies_wipe (replies, ec->nodes); + return ret; +} + +int32_t +ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp) +{ + int ret = -ENOMEM; + gf_boolean_t need_heal = _gf_false; + call_frame_t *frame = NULL; + ec_t *ec = NULL; + unsigned char *up_subvols = NULL; + loc_t loc = {0, }; + + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, entry_loc, out); + + ec = this->private; + up_subvols = alloca0(ec->nodes); + ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + goto out; + } + ec_owner_set(frame, frame->root); + frame->root->uid = 0; + frame->root->gid = 0; + frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + + if (loc_copy(&loc, entry_loc) != 0) { + gf_msg (this->name, GF_LOG_ERROR, + ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); + goto out; + } + if (!loc.inode) { + ret = syncop_inode_find (this, this, loc.gfid, + &loc.inode, NULL, NULL); + if (ret < 0) + goto out; + } + + ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols, + &need_heal); + if (ret == ec->nodes) { + goto set_heal; + } + need_heal = _gf_false; + ret = ec_heal_locked_inspect (frame, ec, loc.inode, + &need_heal); + if (ret < 0) + goto out; +set_heal: + if (need_heal) { + ret = ec_set_heal_info (dict_rsp, "heal"); + } else { + ret = ec_set_heal_info (dict_rsp, "no-heal"); + } +out: + if (frame) { + STACK_DESTROY (frame->root); + } + loc_wipe (&loc); + return ret; +} diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index c87f328db0..9860f10ead 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -126,42 +126,6 @@ unlock: return ret; } - -int -ec_shd_inode_find (xlator_t *this, xlator_t *subvol, - uuid_t gfid, inode_t **inode) -{ - int ret = 0; - loc_t loc = {0, }; - struct iatt iatt = {0, }; - *inode = NULL; - - *inode = inode_find (this->itable, gfid); - if (*inode) - goto out; - - loc.inode = inode_new (this->itable); - if (!loc.inode) { - ret = -ENOMEM; - goto out; - } - gf_uuid_copy (loc.gfid, gfid); - - ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL); - if (ret < 0) - goto out; - - *inode = inode_link (loc.inode, NULL, NULL, &iatt); - if (!*inode) { - ret = -ENOMEM; - goto out; - } -out: - loc_wipe (&loc); - return ret; -} - - int ec_shd_index_inode (xlator_t *this, xlator_t *subvol, inode_t **inode) { @@ -190,7 +154,8 @@ ec_shd_index_inode (xlator_t *this, xlator_t *subvol, inode_t **inode) gf_msg_debug (this->name, 0, "index-dir gfid for %s: %s", subvol->name, uuid_utoa (index_gfid)); - ret = ec_shd_inode_find (this, subvol, index_gfid, inode); + ret = syncop_inode_find (this, subvol, index_gfid, + inode, NULL, NULL); out: loc_wipe (&rootloc); @@ -250,8 +215,8 @@ ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (ret < 0) goto out; - ret = ec_shd_inode_find (healer->this, healer->this, loc.gfid, - &loc.inode); + ret = syncop_inode_find (healer->this, healer->this, loc.gfid, + &loc.inode, NULL, NULL); if (ret < 0) goto out; @@ -329,7 +294,8 @@ ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (ret < 0) goto out; - ret = ec_shd_inode_find (this, this, loc.gfid, &loc.inode); + ret = syncop_inode_find (this, this, loc.gfid, + &loc.inode, NULL, NULL); if (ret < 0) goto out; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index dff5a784b2..e10de4e38a 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -806,13 +806,11 @@ ec_handle_heal_commands (call_frame_t *frame, xlator_t *this, loc_t *loc, if (!name || strcmp (name, GF_HEAL_INFO)) return -1; - dict_rsp = dict_new (); - if (dict_rsp == NULL) - goto out; + op_errno = -ec_get_heal_info (this, loc, &dict_rsp); + if (op_errno <= 0) { + op_errno = op_ret = 0; + } - if (dict_set_str (dict_rsp, "heal-info", "heal") == 0) - op_ret = 0; -out: STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict_rsp, NULL); if (dict_rsp) dict_unref (dict_rsp); |
