From a60fc2ddc03134fb23c5ed5c0bcb195e1649416b Mon Sep 17 00:00:00 2001 From: Sanju Rakonde Date: Wed, 21 Feb 2018 12:46:25 +0530 Subject: glusterd: handling brick termination in brick-mux Problem: There's a race between the last glusterfs_handle_terminate() response sent to glusterd and the kill that happens immediately if the terminated brick is the last brick. Solution: When it is a last brick for the brick process, instead of glusterfsd killing itself, glusterd will kill the process in case of brick multiplexing. And also changing gf_attach utility accordingly. Change-Id: I386c19ca592536daa71294a13d9fc89a26d7e8c0 fixes: bz#1545048 BUG: 1545048 Signed-off-by: Sanju Rakonde --- glusterfsd/src/gf_attach.c | 41 +++++++++++++++++++++++++++++++++++++--- glusterfsd/src/glusterfsd-mgmt.c | 33 +++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 14 deletions(-) (limited to 'glusterfsd') diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c index 3f248292dd..0eb4868263 100644 --- a/glusterfsd/src/gf_attach.c +++ b/glusterfsd/src/gf_attach.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include //#include "config.h" #include "glusterfs.h" @@ -23,6 +26,7 @@ int done = 0; int rpc_status; +glfs_t *fs; struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = { [GLUSTERD_BRICK_NULL] = {"NULL", NULL }, @@ -71,11 +75,43 @@ my_notify (struct rpc_clnt *rpc, void *mydata, } int32_t -my_callback (struct rpc_req *req, struct iovec *iov, int count, void *frame) +my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame) { + gd1_mgmt_brick_op_rsp rsp; + dict_t *dict = NULL; + pid_t pid = -1; + int ret = -1; + xlator_t *this = NULL; + + this = fs->ctx->master; + memset (&rsp, 0, sizeof (rsp)); + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + + if (ret < 0) { + fprintf (stderr, "xdr decoding failed\n"); + goto out; + } + GF_PROTOCOL_DICT_UNSERIALIZE (this, dict, + (rsp.output.output_val), + (rsp.output.output_len), + ret, rsp.op_errno, out); + if (dict) { + if (dict_get_int32 (dict, "last_brick_terminated", &pid) == 0) { + int status = 0; + + gf_log ("gf_attach", GF_LOG_INFO, "Killing %d", pid); + kill (pid, SIGTERM); + waitpid (pid, &status, 0); + } + dict_unref (dict); + } + rpc_status = req->rpc_status; done = 1; - return 0; + ret = 0; +out: + return ret; } /* copied from gd_syncop_submit_request */ @@ -170,7 +206,6 @@ usage (char *prog) int main (int argc, char *argv[]) { - glfs_t *fs; struct rpc_clnt *rpc; dict_t *options; int ret; diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index d2b39494e5..c4df275077 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -159,21 +159,31 @@ out: } int -glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret) +glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret, + gf_boolean_t last_brick) { gd1_mgmt_brick_op_rsp rsp = {0,}; dict_t *dict = NULL; - int ret = 0; + int ret = -1; rsp.op_ret = op_ret; rsp.op_errno = 0; rsp.op_errstr = ""; dict = dict_new (); - if (dict) + if (dict) { + /* Setting the last_brick_terminated key in dictionary is + * required to for standalone gf_attach utility to work. + * gf_attach utility will receive this dictionary and kill + * the process. + */ + if (last_brick) { + ret = dict_set_int32 (dict, "last_brick_terminated", + getpid()); + } ret = dict_allocate_and_serialize (dict, &rsp.output.output_val, &rsp.output.output_len); - + } if (ret == 0) ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL, @@ -262,6 +272,7 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) xlator_t *victim = NULL; xlator_list_t **trav_p = NULL; gf_boolean_t lockflag = _gf_false; + gf_boolean_t last_brick = _gf_false; ret = xdr_to_generic (req->msg[0], &xlator_req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -294,17 +305,16 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) * make sure it's down and if it's already down that's * good enough. */ - glusterfs_terminate_response_send (req, 0); + glusterfs_terminate_response_send (req, 0, last_brick); goto err; } - glusterfs_terminate_response_send (req, 0); if ((trav_p == &top->children) && !(*trav_p)->next) { - gf_log (THIS->name, GF_LOG_INFO, - "terminating after loss of last child %s", - xlator_req.name); - rpc_clnt_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); - kill (getpid(), SIGTERM); + last_brick = _gf_true; + glusterfs_terminate_response_send (req, 0, last_brick); + gf_log (THIS->name, GF_LOG_INFO, "This is last brick of process." + "glusterD will kill the process and takes care of " + "removal of entries from port map register"); } else { /* * This is terribly unsafe without quiescing or shutting @@ -313,6 +323,7 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) * * TBD: finish implementing this "detach" code properly */ + glusterfs_terminate_response_send (req, 0, last_brick); UNLOCK (&ctx->volfile_lock); lockflag = _gf_true; gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" -- cgit