From d1e945fdd994b40adc1ae5325bd89cbac355e68f Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Wed, 12 Sep 2012 19:23:48 +0200 Subject: FO: Check server validity before setting status The list of resolved servers is allocated on the back end context and kept in the fo_service structure. However, a single request often resolves a server and keeps a pointer until the end of a request and only then gives feedback about the server based on the request result. This presents a big race condition in case the SRV resolution is used. When there are requests coming in in parallel, it is possible that an incoming request will invalidate a server until another request that holds a pointer to the original server is able to give a feedback. This patch simply checks if a server is in the list of servers maintained by a service before reading its status. https://fedorahosted.org/sssd/ticket/1364 --- src/providers/data_provider_fo.c | 27 +++++++++++----------- src/providers/dp_backend.h | 1 + src/providers/fail_over.c | 13 ++++++----- src/providers/fail_over.h | 2 +- src/providers/krb5/krb5_auth.c | 14 +++++++----- src/providers/ldap/ldap_auth.c | 4 +++- src/providers/ldap/sdap_async_connection.c | 36 +++++++++++++++++++++++------- 7 files changed, 63 insertions(+), 34 deletions(-) diff --git a/src/providers/data_provider_fo.c b/src/providers/data_provider_fo.c index ada41a3ac..f4627f32e 100644 --- a/src/providers/data_provider_fo.c +++ b/src/providers/data_provider_fo.c @@ -547,32 +547,31 @@ void reset_fo(struct be_ctx *be_ctx) } void be_fo_set_port_status(struct be_ctx *ctx, + const char *service_name, struct fo_server *server, enum port_status status) { - struct be_svc_data *svc; - struct fo_service *fsvc; - - fo_set_port_status(server, status); + struct be_svc_data *be_svc; - fsvc = fo_get_server_service(server); - if (!fsvc) { - DEBUG(SSSDBG_OP_FAILURE, ("BUG: No service associated with server\n")); + be_svc = be_fo_find_svc_data(ctx, service_name); + if (be_svc == NULL) { + DEBUG(SSSDBG_OP_FAILURE, + ("No service associated with name %s\n", service_name)); return; } - DLIST_FOR_EACH(svc, ctx->be_fo->svcs) { - if (svc->fo_service == fsvc) break; - } - - if (!svc) { - DEBUG(SSSDBG_OP_FAILURE, ("BUG: Unknown service\n")); + if (!fo_svc_has_server(be_svc->fo_service, server)) { + DEBUG(SSSDBG_OP_FAILURE, + ("The server %p is not valid anymore, cannot set its status\n")); return; } + /* Now we know that the server is valid */ + fo_set_port_status(server, status); + if (status == PORT_WORKING) { /* We were successful in connecting to the server. Cycle through all * available servers next time */ - svc->first_resolved = NULL; + be_svc->first_resolved = NULL; } } diff --git a/src/providers/dp_backend.h b/src/providers/dp_backend.h index 6e98e7ef3..a1f64a28c 100644 --- a/src/providers/dp_backend.h +++ b/src/providers/dp_backend.h @@ -212,6 +212,7 @@ struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, int be_resolve_server_recv(struct tevent_req *req, struct fo_server **srv); void be_fo_set_port_status(struct be_ctx *ctx, + const char *service_name, struct fo_server *server, enum port_status status); diff --git a/src/providers/fail_over.c b/src/providers/fail_over.c index deb7d394c..8925e3586 100644 --- a/src/providers/fail_over.c +++ b/src/providers/fail_over.c @@ -1476,10 +1476,13 @@ void fo_reset_services(struct fo_ctx *fo_ctx) } } -struct fo_service * -fo_get_server_service(struct fo_server *server) +bool fo_svc_has_server(struct fo_service *service, struct fo_server *server) { - if (!server) return NULL; - return server->service; -} + struct fo_server *srv; + + DLIST_FOR_EACH(srv, service->server_list) { + if (srv == server) return true; + } + return false; +} diff --git a/src/providers/fail_over.h b/src/providers/fail_over.h index 8fbbe251b..2b0bb92d9 100644 --- a/src/providers/fail_over.h +++ b/src/providers/fail_over.h @@ -186,6 +186,6 @@ int fo_is_srv_lookup(struct fo_server *s); void fo_reset_services(struct fo_ctx *fo_ctx); -struct fo_service *fo_get_server_service(struct fo_server *server); +bool fo_svc_has_server(struct fo_service *service, struct fo_server *server); #endif /* !__FAIL_OVER_H__ */ diff --git a/src/providers/krb5/krb5_auth.c b/src/providers/krb5/krb5_auth.c index 0306426cc..7810c250f 100644 --- a/src/providers/krb5/krb5_auth.c +++ b/src/providers/krb5/krb5_auth.c @@ -880,6 +880,7 @@ static void krb5_child_done(struct tevent_req *subreq) /* ..which is unreachable by now.. */ if (msg_status == PAM_AUTHTOK_LOCK_BUSY) { be_fo_set_port_status(state->be_ctx, + state->krb5_ctx->service->name, kr->kpasswd_srv, PORT_NOT_WORKING); /* ..try to resolve next kpasswd server */ if (krb5_next_kpasswd(req) == NULL) { @@ -888,6 +889,7 @@ static void krb5_child_done(struct tevent_req *subreq) return; } else { be_fo_set_port_status(state->be_ctx, + state->krb5_ctx->service->name, kr->kpasswd_srv, PORT_WORKING); } } @@ -898,7 +900,8 @@ static void krb5_child_done(struct tevent_req *subreq) if (msg_status == PAM_AUTHINFO_UNAVAIL || (kr->kpasswd_srv == NULL && msg_status == PAM_AUTHTOK_LOCK_BUSY)) { if (kr->srv != NULL) { - be_fo_set_port_status(state->be_ctx, kr->srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be_ctx, state->krb5_ctx->service->name, + kr->srv, PORT_NOT_WORKING); /* ..try to resolve next KDC */ if (krb5_next_kdc(req) == NULL) { tevent_req_error(req, ENOMEM); @@ -906,7 +909,8 @@ static void krb5_child_done(struct tevent_req *subreq) return; } } else if (kr->srv != NULL) { - be_fo_set_port_status(state->be_ctx, kr->srv, PORT_WORKING); + be_fo_set_port_status(state->be_ctx, state->krb5_ctx->service->name, + kr->srv, PORT_WORKING); } /* Now only a successful authentication or password change is left. @@ -969,19 +973,19 @@ static struct tevent_req *krb5_next_server(struct tevent_req *req) switch (pd->cmd) { case SSS_PAM_AUTHENTICATE: case SSS_CMD_RENEW: - be_fo_set_port_status(state->be_ctx, + be_fo_set_port_status(state->be_ctx, state->krb5_ctx->service->name, state->kr->srv, PORT_NOT_WORKING); next_req = krb5_next_kdc(req); break; case SSS_PAM_CHAUTHTOK: case SSS_PAM_CHAUTHTOK_PRELIM: if (state->kr->kpasswd_srv) { - be_fo_set_port_status(state->be_ctx, + be_fo_set_port_status(state->be_ctx, state->krb5_ctx->service->name, state->kr->kpasswd_srv, PORT_NOT_WORKING); next_req = krb5_next_kpasswd(req); break; } else { - be_fo_set_port_status(state->be_ctx, + be_fo_set_port_status(state->be_ctx, state->krb5_ctx->service->name, state->kr->srv, PORT_NOT_WORKING); next_req = krb5_next_kdc(req); break; diff --git a/src/providers/ldap/ldap_auth.c b/src/providers/ldap/ldap_auth.c index 734249ced..08502c147 100644 --- a/src/providers/ldap/ldap_auth.c +++ b/src/providers/ldap/ldap_auth.c @@ -590,6 +590,7 @@ static void auth_connect_done(struct tevent_req *subreq) if (state->srv) { /* mark this server as bad if connection failed */ be_fo_set_port_status(state->ctx->be, + state->sdap_service->name, state->srv, PORT_NOT_WORKING); } if (ret == ETIMEDOUT) { @@ -602,7 +603,8 @@ static void auth_connect_done(struct tevent_req *subreq) tevent_req_error(req, ret); return; } else if (state->srv) { - be_fo_set_port_status(state->ctx->be, state->srv, PORT_WORKING); + be_fo_set_port_status(state->ctx->be, state->sdap_service->name, + state->srv, PORT_WORKING); } ret = get_user_dn(state, state->ctx->be->sysdb, state->ctx->opts, diff --git a/src/providers/ldap/sdap_async_connection.c b/src/providers/ldap/sdap_async_connection.c index 9a56ce1ef..26f724754 100644 --- a/src/providers/ldap/sdap_async_connection.c +++ b/src/providers/ldap/sdap_async_connection.c @@ -942,7 +942,20 @@ static void sdap_kinit_done(struct tevent_req *subreq) ret = sdap_get_tgt_recv(subreq, state, &result, &kerr, &ccname, &expire_time); talloc_zfree(subreq); - if (ret != EOK) { + if (ret == ETIMEDOUT) { + /* The child didn't even respond. Perhaps the KDC is too busy, + * retry with another KDC */ + DEBUG(SSSDBG_MINOR_FAILURE, + ("Communication with KDC timed out, trying the next one\n")); + be_fo_set_port_status(state->be, state->krb_service_name, + state->kdc_srv, PORT_NOT_WORKING); + nextreq = sdap_kinit_next_kdc(req); + if (!nextreq) { + tevent_req_error(req, ENOMEM); + } + return; + } else if (ret != EOK) { + /* A severe error while executing the child. Abort the operation. */ state->result = SDAP_AUTH_FAILED; DEBUG(1, ("child failed (%d [%s])\n", ret, strerror(ret))); tevent_req_error(req, ret); @@ -963,7 +976,8 @@ static void sdap_kinit_done(struct tevent_req *subreq) return; } else { if (kerr == KRB5_KDC_UNREACH) { - be_fo_set_port_status(state->be, state->kdc_srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be, state->krb_service_name, + state->kdc_srv, PORT_NOT_WORKING); nextreq = sdap_kinit_next_kdc(req); if (!nextreq) { tevent_req_error(req, ENOMEM); @@ -1294,7 +1308,8 @@ static void sdap_cli_connect_done(struct tevent_req *subreq) talloc_zfree(subreq); if (ret) { /* retry another server */ - be_fo_set_port_status(state->be, state->srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be, state->service->name, + state->srv, PORT_NOT_WORKING); ret = sdap_cli_resolve_next(req); if (ret != EOK) { tevent_req_error(req, ret); @@ -1367,7 +1382,8 @@ static void sdap_cli_rootdse_done(struct tevent_req *subreq) talloc_zfree(subreq); if (ret) { if (ret == ETIMEDOUT) { /* retry another server */ - be_fo_set_port_status(state->be, state->srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be, state->service->name, + state->srv, PORT_NOT_WORKING); ret = sdap_cli_resolve_next(req); if (ret != EOK) { tevent_req_error(req, ret); @@ -1495,7 +1511,8 @@ static void sdap_cli_kinit_done(struct tevent_req *subreq) talloc_zfree(subreq); if (ret) { if (ret == ETIMEDOUT) { /* child timed out, retry another server */ - be_fo_set_port_status(state->be, state->srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be, state->service->name, + state->srv, PORT_NOT_WORKING); ret = sdap_cli_resolve_next(req); if (ret != EOK) { tevent_req_error(req, ret); @@ -1609,7 +1626,8 @@ static void sdap_cli_rootdse_auth_done(struct tevent_req *subreq) if (ret == ETIMEDOUT) { /* The server we authenticated against went down. Retry another * one */ - be_fo_set_port_status(state->be, state->srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be, state->service->name, + state->srv, PORT_NOT_WORKING); ret = sdap_cli_resolve_next(req); if (ret != EOK) { tevent_req_error(req, ret); @@ -1657,7 +1675,8 @@ int sdap_cli_connect_recv(struct tevent_req *req, if (tevent_req_is_error(req, &tstate, &err)) { /* mark the server as bad if connection failed */ if (state->srv) { - be_fo_set_port_status(state->be, state->srv, PORT_NOT_WORKING); + be_fo_set_port_status(state->be, state->service->name, + state->srv, PORT_NOT_WORKING); } else { if (can_retry) { *can_retry = false; @@ -1669,7 +1688,8 @@ int sdap_cli_connect_recv(struct tevent_req *req, } return EIO; } else if (state->srv) { - be_fo_set_port_status(state->be, state->srv, PORT_WORKING); + be_fo_set_port_status(state->be, state->service->name, + state->srv, PORT_WORKING); } if (gsh) { -- cgit