summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorshishir gowda <sgowda@redhat.com>2012-12-07 10:00:15 +0530
committerAnand Avati <avati@redhat.com>2012-12-11 16:05:50 -0800
commit448b59e60b0ea5e085769283e89893176b86659f (patch)
treebb0d25ff5443ae8e9fcc38d1a98fd0aa6e757c50 /xlators/cluster
parent1d87098f2df19af5a42ade6b305cbb01b85af8ea (diff)
downloadglusterfs-448b59e60b0ea5e085769283e89893176b86659f.tar.gz
glusterfs-448b59e60b0ea5e085769283e89893176b86659f.tar.xz
glusterfs-448b59e60b0ea5e085769283e89893176b86659f.zip
cluster/distribute: re-set layouts to prevent overlaps
When subvols-per-directory option is used, with bricks addition/removal the layouts might get distributed to other subvols, which were not part of the layout before. We need to clean up layouts on old subvolumes, to prevent overlaps. Also, we need to make sure if layout-cnt is never less than subvolume-cnt. Change-Id: I00994a092ca0c99aedcc41bd9412d43460f88a04 BUG: 884455 Signed-off-by: shishir gowda <sgowda@redhat.com> Reviewed-on: http://review.gluster.org/4281 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/dht/src/dht-common.h3
-rw-r--r--xlators/cluster/dht/src/dht-layout.c13
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c46
-rw-r--r--xlators/cluster/dht/src/dht.c1
4 files changed, 55 insertions, 8 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 1f3ccc1cde..83ec345d64 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -263,6 +263,7 @@ struct dht_conf {
/* to keep track of nodes which are decomissioned */
xlator_t **decommissioned_bricks;
int decommission_in_progress;
+ int decommission_subvols_cnt;
/* defrag related */
gf_defrag_info_t *defrag;
@@ -721,4 +722,6 @@ int
dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
int
dht_dir_has_layout (dict_t *xattr);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index ef728209fe..8cae526539 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -419,6 +419,19 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
layout->list[j].xlator->name));
}
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, xlator->name))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 4840034a97..fbe4cab3ea 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -101,7 +101,8 @@ dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
xlator_t *subvol = NULL;
dict_t *xattr = NULL;
@@ -112,7 +113,10 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
local = frame->local;
- subvol = layout->list[i].xlator;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
this = frame->this;
GF_VALIDATE_OR_GOTO ("", this, err);
@@ -179,21 +183,42 @@ dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int i = 0;
int count = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
gf_log (this->name, GF_LOG_DEBUG,
"writing the new range for all subvolumes");
- local->call_cnt = count = layout->cnt;
+ local->call_cnt = count = conf->subvolume_cnt;
for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--count == 0)
- break;
+ goto out;
}
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
@@ -235,7 +260,7 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
if (layout->list[i].err != -1 || !layout->list[i].stop)
continue;
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--missing_xattr == 0)
break;
@@ -524,8 +549,13 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
}
}
- count = ((layout->spread_cnt) ? layout->spread_cnt :
- ((count) ? count : 1));
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (excluding bricks that are being decommissioned). Else
+ * return count */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <=
+ (conf->subvolume_cnt - conf->decommission_subvols_cnt))) ?
+ layout->spread_cnt : ((count) ? count : 1));
return count;
}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index d7b1e9655e..d70d713ac7 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -262,6 +262,7 @@ dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
if (!strcmp (conf->subvolumes[i]->name, node)) {
conf->decommissioned_bricks[i] =
conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
gf_log (this->name, GF_LOG_INFO,
"decommissioning subvolume %s",
conf->subvolumes[i]->name);