summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-03-30 14:27:44 -0400
committerKaleb KEITHLEY <kkeithle@redhat.com>2015-04-08 07:28:21 +0000
commita8260044291cb6eee44974d8c52caa9f4cfb3993 (patch)
tree3b9b0f83bd6ae869a26a75dafdc988196a62d049
parentf0cd1d73c63001740cd7691a77df7631c9b8e8dc (diff)
downloadglusterfs-a8260044291cb6eee44974d8c52caa9f4cfb3993.tar.gz
glusterfs-a8260044291cb6eee44974d8c52caa9f4cfb3993.tar.xz
glusterfs-a8260044291cb6eee44974d8c52caa9f4cfb3993.zip
glusterd: Support distributed replicated volumes on hot tier
We did not set up the graph properly for hot tiers with replicated subvolumes. Also add check that the file has not already been moved by another replicated brick on the same node. Change-Id: I9adef565ab60f6774810962d912168b77a6032fa BUG: 1206517 Signed-off-by: Dan Lambright <dlambrig@redhat.com> Reviewed-on: http://review.gluster.org/10054 Reviewed-by: Joseph Fernandes <josferna@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
-rw-r--r--cli/src/cli-cmd-parser.c5
-rw-r--r--cli/src/cli-cmd-volume.c7
-rw-r--r--cli/src/cli.h2
-rwxr-xr-xtests/basic/tier/tier.t22
-rw-r--r--xlators/cluster/dht/src/tier.c39
-rw-r--r--xlators/cluster/dht/src/tier.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c49
8 files changed, 97 insertions, 35 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 7c03ae228f..a334fd931b 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1441,7 +1441,7 @@ out:
int32_t
cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
- dict_t **options)
+ dict_t **options, int *ret_type)
{
dict_t *dict = NULL;
char *volname = NULL;
@@ -1559,6 +1559,9 @@ parse_bricks:
*options = dict;
out:
+ if (ret_type)
+ *ret_type = type;
+
if (ret) {
gf_log ("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");
if (dict)
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 5436c76bcc..9c61c3f541 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -787,7 +787,7 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state,
if (!frame)
goto out;
- ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options, 0);
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -847,12 +847,13 @@ cli_cmd_volume_attach_tier_cbk (struct cli_state *state,
int parse_error = 0;
gf_answer_t answer = GF_ANSWER_NO;
cli_local_t *local = NULL;
+ int type = 0;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
goto out;
- ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options, &type);
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -872,7 +873,7 @@ cli_cmd_volume_attach_tier_cbk (struct cli_state *state,
if (ret)
goto out;
- ret = dict_set_int32 (options, "type", GF_CLUSTER_TYPE_TIER);
+ ret = dict_set_int32 (options, "type", type);
if (ret)
goto out;
diff --git a/cli/src/cli.h b/cli/src/cli.h
index ed2bc4aba8..60bbcb9f62 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -249,7 +249,7 @@ cli_cmd_ganesha_parse (struct cli_state *state, const char **words,
int32_t
cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
- dict_t **options);
+ dict_t **options, int *type);
int32_t
cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t
index 5884641235..d1e1041f87 100755
--- a/tests/basic/tier/tier.t
+++ b/tests/basic/tier/tier.t
@@ -3,6 +3,12 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+LAST_BRICK=3
+CACHE_BRICK_FIRST=4
+CACHE_BRICK_LAST=5
+DEMOTE_TIMEOUT=12
+PROMOTE_TIMEOUT=5
+
function file_on_slow_tier {
s=$(md5sum $1)
for i in `seq 0 $LAST_BRICK`; do
@@ -19,8 +25,9 @@ function file_on_fast_tier {
local ret="1"
s1=$(md5sum $1)
- s2=$(md5sum $B0/${V0}${CACHE_BRICK}/$1)
- if [ -e $B0/${V0}${CACHE_BRICK}/$1 ] && ! [ "$s1" == "$s2" ]; then
+ s2=$(md5sum $B0/${V0}${CACHE_BRICK_FIRST}/$1)
+
+ if [ -e $B0/${V0}${CACHE_BRICK_FIRST}/$1 ] && ! [ "$s1" == "$s2" ]; then
echo "0"
else
echo "1"
@@ -45,17 +52,14 @@ function confirm_vol_stopped {
fi
}
-LAST_BRICK=1
-CACHE_BRICK=2
-DEMOTE_TIMEOUT=12
-PROMOTE_TIMEOUT=5
cleanup
TEST glusterd
TEST pidof glusterd
-TEST $CLI volume create $V0 $H0:$B0/${V0}{0..$LAST_BRICK}
-TEST $CLI volume attach-tier $V0 $H0:$B0/${V0}${CACHE_BRICK}
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK}
+TEST $CLI volume attach-tier $V0 replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
TEST $CLI volume start $V0
TEST $CLI volume set $V0 features.ctr-enabled on
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
@@ -120,7 +124,7 @@ TEST $CLI volume detach-tier $V0
# temporarily comment out
#TEST ! [ -e $M0/d1/data.txt ]
-EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK}
+EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST}
EXPECT_WITHIN $REBALANCE_TIMEOUT "0" confirm_vol_stopped $V0
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index f8b32f1619..fe18d7b91a 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -142,12 +142,17 @@ tier_migrate_using_query_file (void *_args)
int total_status = 0;
FILE *queryFILE = NULL;
char *link_str = NULL;
+ xlator_t *src_subvol = NULL;
+ dht_conf_t *conf = NULL;
GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);
GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
this = query_cbk_args->this;
GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->defrag, out);
GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->queryFILE, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ conf = this->private;
defrag = query_cbk_args->defrag;
@@ -300,8 +305,29 @@ tier_migrate_using_query_file (void *_args)
inode_unref (loc.inode);
loc.inode = linked_inode;
+ /*
+ * Do not promote/demote if file already is where it
+ * should be. This shall become a skipped count.
+ */
+ src_subvol = dht_subvol_get_cached(this, loc.inode);
+
+ if (query_cbk_args->is_promotion &&
+ src_subvol == conf->subvolumes[1]) {
+ per_link_status = -1;
+ goto error;
+ }
+
+ if (!query_cbk_args->is_promotion &&
+ src_subvol == conf->subvolumes[0]) {
+ per_link_status = -1;
+ goto error;
+ }
+
gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS, "Tier migrate file %s",
+ DHT_MSG_LOG_TIER_STATUS, "Tier %d"
+ " src_subvol %s file %s",
+ query_cbk_args->is_promotion,
+ src_subvol->name,
loc.name);
if (tier_check_same_node (this, &loc, defrag)) {
@@ -587,6 +613,7 @@ tier_demote (void *args)
query_cbk_args.this = demotion_args->this;
query_cbk_args.defrag = demotion_args->defrag;
+ query_cbk_args.is_promotion = 0;
/*Build the query file using bricklist*/
ret = tier_build_migration_qfile(demotion_args, &query_cbk_args,
@@ -622,6 +649,7 @@ static void
query_cbk_args.this = promotion_args->this;
query_cbk_args.defrag = promotion_args->defrag;
+ query_cbk_args.is_promotion = 1;
/*Build the query file using bricklist*/
ret = tier_build_migration_qfile(promotion_args, &query_cbk_args,
@@ -641,7 +669,7 @@ out:
return NULL;
}
-static void
+static int
tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
{
xlator_list_t *child = NULL;
@@ -690,14 +718,17 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
if (dict_add_dynstr_with_alloc(bricklist, "brick",
db_path))
goto out;
+
+ ret = 0;
+ goto out;
}
}
for (child = xl->children; child; child = child->next) {
- tier_get_bricklist(child->xlator, bricklist);
+ ret = tier_get_bricklist(child->xlator, bricklist);
}
out:
- return;
+ return ret;
}
int
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
index 6dc830dabf..7d4ea6b7eb 100644
--- a/xlators/cluster/dht/src/tier.h
+++ b/xlators/cluster/dht/src/tier.h
@@ -50,6 +50,7 @@ typedef struct _query_cbk_args {
xlator_t *this;
gf_defrag_info_t *defrag;
FILE *queryFILE;
+ int is_promotion;
} query_cbk_args_t;
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index b32dbb8913..165622f87e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -465,6 +465,9 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
ret = -1;
goto out;
}
+
+ ret = dict_get_int32 (dict, "type", &type);
+
goto brick_val;
}
@@ -1835,6 +1838,7 @@ glusterd_op_perform_attach_tier (dict_t *dict,
{
int ret = 0;
int replica_count = 0;
+ int type = 0;
/*
* Store the new (cold) tier's structure until the graph is generated.
@@ -1853,6 +1857,9 @@ glusterd_op_perform_attach_tier (dict_t *dict,
else
volinfo->tier_info.hot_replica_count = 1;
volinfo->tier_info.hot_brick_count = count;
+ ret = dict_get_int32 (dict, "type", &type);
+ volinfo->tier_info.hot_type = type;
+ ret = dict_set_int32 (dict, "type", GF_CLUSTER_TYPE_TIER);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 8a61da2e8e..bbac2a2283 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -3107,11 +3107,13 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
/* All other cases, it will have one or the other cluster type */
switch (volinfo->type) {
case GF_CLUSTER_TYPE_REPLICATE:
- clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
- replicate_args[0],
- replicate_args[1],
- volinfo->brick_count,
- volinfo->replica_count);
+ clusters = volgen_link_bricks_from_list_tail
+ (graph, volinfo,
+ replicate_args[0],
+ replicate_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+
if (clusters < 0)
goto out;
break;
@@ -3284,12 +3286,12 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
volinfo->type = volinfo->tier_info.cold_type;
sprintf (volinfo->volname, "%s-cold", st_volname);
- ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, is_quotad);
if (ret)
goto out;
cxl = first_of(graph);
- volinfo->type = GF_CLUSTER_TYPE_TIER;
+ volinfo->type = volinfo->tier_info.hot_type;
volinfo->brick_count = volinfo->tier_info.hot_brick_count;
volinfo->replica_count = volinfo->tier_info.hot_replica_count;
volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
@@ -3297,21 +3299,34 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
sprintf (volinfo->volname, "%s-hot", st_volname);
- if (volinfo->dist_leaf_count == 1) {
- dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
- ret = volgen_link_bricks_from_list_head (graph, volinfo,
- "cluster/distribute",
- "%s-dht",
- dist_count,
- dist_count);
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+
+ if (volinfo->dist_leaf_count != 1) {
+ ret = volgen_link_bricks_from_list_head
+ (graph, volinfo,
+ "cluster/replicate",
+ "%s-replicate-%d",
+ volinfo->brick_count,
+ volinfo->replica_count);
+ if (ret != -1)
+ volgen_link_bricks_from_list_tail (graph, volinfo,
+ "cluster/distribute",
+ "%s-dht",
+ dist_count,
+ dist_count);
} else {
- ret = volume_volgen_graph_build_clusters (graph,
- volinfo,
- _gf_false);
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ "cluster/distribute",
+ "%s-dht",
+ dist_count,
+ dist_count);
}
+ if (ret == -1)
+ goto out;
hxl = first_of(graph);
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s",
"tier-dht", 0);
gf_asprintf(&rule, "%s-hot-dht", st_volname);