From dba55ae364a2772904bb68a6bd0ea87289ee1470 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Thu, 25 May 2017 21:43:42 +0530 Subject: glusterfs: Not able to mount running volume after enable brick mux and stopped any volume Problem: After enabled brick mux if any volume has down and then try ot run mount with running volume , mount command is hung. Solution: After enable brick mux server has shared one data structure server_conf for all associated subvolumes.After down any subvolume in some ungraceful manner (remove brick directory) posix xlator sends GF_EVENT_CHILD_DOWN event to parent xlatros and server notify updates the child_up to false in server_conf.When client is trying to communicate with server through mount it checks conf->child_up and it is FALSE so it throws message "translator are not yet ready". From this patch updated structure server_conf to save child_up status for xlator wise. Another improtant correction from this patch is cleanup threads from server side xlators after stop the volume. BUG: 1453977 Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d Signed-off-by: Mohit Agrawal Reviewed-on: https://review.gluster.org/17356 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System Reviewed-by: Raghavendra Talur CentOS-regression: Gluster Build System Reviewed-by: Jeff Darcy --- glusterfsd/src/glusterfsd-mgmt.c | 113 ++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 55 deletions(-) (limited to 'glusterfsd') diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 8ede110121..c17bf3bb6f 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -198,10 +198,11 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) { gd1_mgmt_brick_op_req xlator_req = {0,}; ssize_t ret; - xlator_t *top = NULL; - xlator_t *victim = NULL; - glusterfs_ctx_t *ctx = NULL; - xlator_list_t **trav_p; + glusterfs_ctx_t *ctx = NULL; + xlator_t *top = NULL; + xlator_t *victim = NULL; + xlator_list_t **trav_p = NULL; + gf_boolean_t lockflag = _gf_false; ret = xdr_to_generic (req->msg[0], &xlator_req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -214,57 +215,54 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) LOCK (&ctx->volfile_lock); { /* Find the xlator_list_t that points to our victim. */ - top = glusterfsd_ctx->active->first; - for (trav_p = &top->children; *trav_p; - trav_p = &(*trav_p)->next) { - victim = (*trav_p)->xlator; - if (strcmp (victim->name, xlator_req.name) == 0) { - break; + if (glusterfsd_ctx->active) { + top = glusterfsd_ctx->active->first; + for (trav_p = &top->children; *trav_p; + trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (strcmp (victim->name, xlator_req.name) == 0) { + break; + } } } - - if (!*trav_p) { - gf_log (THIS->name, GF_LOG_ERROR, - "can't terminate %s - not found", - xlator_req.name); - /* - * Used to be -ENOENT. However, the caller asked us to - * make sure it's down and if it's already down that's - * good enough. - */ - glusterfs_terminate_response_send (req, 0); - goto err; - } - + } + if (!*trav_p) { + gf_log (THIS->name, GF_LOG_ERROR, + "can't terminate %s - not found", + xlator_req.name); + /* + * Used to be -ENOENT. However, the caller asked us to + * make sure it's down and if it's already down that's + * good enough. + */ glusterfs_terminate_response_send (req, 0); - if ((trav_p == &top->children) && !(*trav_p)->next) { - gf_log (THIS->name, GF_LOG_INFO, - "terminating after loss of last child %s", - xlator_req.name); - glusterfs_mgmt_pmap_signout (glusterfsd_ctx, - xlator_req.name); - kill (getpid(), SIGTERM); - } else { - /* - * This is terribly unsafe without quiescing or shutting - * things down properly but it gets us to the point - * where we can test other stuff. - * - * TBD: finish implementing this "detach" code properly - */ - gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" - " child %s", xlator_req.name); - top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim); - glusterfs_mgmt_pmap_signout (glusterfsd_ctx, - xlator_req.name); - - *trav_p = (*trav_p)->next; - glusterfs_autoscale_threads (THIS->ctx, -1); - } + goto err; + } + glusterfs_terminate_response_send (req, 0); + if ((trav_p == &top->children) && !(*trav_p)->next) { + gf_log (THIS->name, GF_LOG_INFO, + "terminating after loss of last child %s", + xlator_req.name); + glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); + kill (getpid(), SIGTERM); + } else { + /* + * This is terribly unsafe without quiescing or shutting + * things down properly but it gets us to the point + * where we can test other stuff. + * + * TBD: finish implementing this "detach" code properly + */ + UNLOCK (&ctx->volfile_lock); + lockflag = _gf_true; + gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" + " child %s", xlator_req.name); + top->notify (top, GF_EVENT_CLEANUP, victim); } err: - UNLOCK (&ctx->volfile_lock); + if (!lockflag) + UNLOCK (&ctx->volfile_lock); free (xlator_req.name); xlator_req.name = NULL; return 0; @@ -838,6 +836,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req) int32_t ret = -1; gd1_mgmt_brick_op_req xlator_req = {0,}; xlator_t *this = NULL; + xlator_t *nextchild = NULL; glusterfs_graph_t *newgraph = NULL; glusterfs_ctx_t *ctx = NULL; @@ -862,15 +861,19 @@ glusterfs_handle_attach (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_INFO, "got attach for %s", xlator_req.name); ret = glusterfs_graph_attach (this->ctx->active, - xlator_req.name, - &newgraph); - if (ret == 0) { - ret = glusterfs_graph_parent_up (newgraph); + xlator_req.name, &newgraph); + if (!ret && (newgraph && newgraph->first)) { + nextchild = newgraph->first; + ret = xlator_notify (nextchild, + GF_EVENT_PARENT_UP, + nextchild); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, + gf_msg (this->name, GF_LOG_ERROR, + 0, LG_MSG_EVENT_NOTIFY_FAILED, "Parent up notification " - "failed"); + "failed for %s ", + nextchild->name); goto out; } glusterfs_autoscale_threads (this->ctx, 1); -- cgit