diff options
-rw-r--r-- | src/providers/data_provider_fo.c | 215 | ||||
-rw-r--r-- | src/providers/dp_backend.h | 3 | ||||
-rw-r--r-- | src/providers/fail_over.c | 75 | ||||
-rw-r--r-- | src/providers/fail_over.h | 7 | ||||
-rw-r--r-- | src/providers/ipa/ipa_common.c | 2 | ||||
-rw-r--r-- | src/providers/krb5/krb5_common.c | 2 | ||||
-rw-r--r-- | src/providers/ldap/ldap_common.c | 4 | ||||
-rw-r--r-- | src/tests/fail_over-tests.c | 13 |
8 files changed, 264 insertions, 57 deletions
diff --git a/src/providers/data_provider_fo.c b/src/providers/data_provider_fo.c index 51d6ae211..1c03e31c2 100644 --- a/src/providers/data_provider_fo.c +++ b/src/providers/data_provider_fo.c @@ -54,6 +54,7 @@ struct be_failover_ctx { struct resolv_ctx *resolv; struct be_svc_data *svcs; + struct tevent_timer *primary_server_handler; }; static const char *proto_table[] = { FO_PROTO_TCP, FO_PROTO_UDP, NULL }; @@ -315,7 +316,8 @@ int be_fo_get_server_count(struct be_ctx *ctx, const char *service_name) } int be_fo_add_server(struct be_ctx *ctx, const char *service_name, - const char *server, int port, void *user_data) + const char *server, int port, void *user_data, + bool primary) { struct be_svc_data *svc; int ret; @@ -325,7 +327,8 @@ int be_fo_add_server(struct be_ctx *ctx, const char *service_name, return ENOENT; } - ret = fo_add_server(svc->fo_service, server, port, user_data); + ret = fo_add_server(svc->fo_service, server, port, + user_data, primary); if (ret && ret != EEXIST) { DEBUG(1, ("Failed to add server to failover service\n")); return ret; @@ -345,6 +348,138 @@ struct be_resolve_server_state { bool first_try; }; +struct be_primary_server_ctx { + struct be_ctx *bctx; + struct tevent_context *ev; + + struct be_svc_data *svc; + unsigned long timeout; + + int attempts; +}; + +errno_t be_resolve_server_process(struct tevent_req *subreq, + struct be_resolve_server_state *state, + struct tevent_req **new_subreq); +static void be_primary_server_done(struct tevent_req *subreq); +static errno_t +be_primary_server_timeout_activate(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct be_ctx *bctx, + struct be_svc_data *svc, + const unsigned long timeout_seconds); + +static void +be_primary_server_timeout(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval tv, void *pvt) +{ + struct be_primary_server_ctx *ctx = talloc_get_type(pvt, struct be_primary_server_ctx); + struct tevent_req *subreq; + + ctx->bctx->be_fo->primary_server_handler = NULL; + + DEBUG(SSSDBG_TRACE_FUNC, ("Looking for primary server!\n")); + subreq = fo_resolve_service_send(ctx->bctx, ctx->ev, + ctx->bctx->be_fo->resolv, + ctx->bctx->be_fo->fo_ctx, + ctx->svc->fo_service); + if (subreq == NULL) { + return; + } + tevent_req_set_callback(subreq, be_primary_server_done, ctx); +} + +static void be_primary_server_done(struct tevent_req *subreq) +{ + errno_t ret; + struct be_primary_server_ctx *ctx; + struct be_resolve_server_state *resolve_state; + struct tevent_req *new_subreq; + + ctx = tevent_req_callback_data(subreq, struct be_primary_server_ctx); + + resolve_state = talloc_zero(ctx->bctx, struct be_resolve_server_state); + if (resolve_state == NULL) { + DEBUG(SSSDBG_CRIT_FAILURE, ("talloc_zero() failed\n")); + return; + } + + resolve_state->attempts = ctx->attempts; + resolve_state->ctx = ctx->bctx; + resolve_state->ev = ctx->ev; + resolve_state->first_try = true; + resolve_state->srv = NULL; + resolve_state->svc = ctx->svc; + + ret = be_resolve_server_process(subreq, resolve_state, &new_subreq); + talloc_free(subreq); + if (ret == EAGAIN) { + ctx->attempts++; + tevent_req_set_callback(new_subreq, be_primary_server_done, ctx); + return; + } else if (ret == EIO || (ret == EOK && + !fo_is_server_primary(resolve_state->srv))) { + + /* Schedule another lookup + * (either no server could be found or it was not primary) + */ + ret = be_primary_server_timeout_activate(ctx->bctx, ctx->ev, ctx->bctx, + ctx->svc, ctx->timeout); + if (ret != EOK) { + DEBUG(SSSDBG_MINOR_FAILURE, ("Could not schedule primary server lookup\n")); + } + } else if (ret == EOK) { + be_run_reconnect_cb(ctx->bctx); + } + talloc_zfree(ctx); + + /* If an error occurred just end the routine */ +} + +static errno_t +be_primary_server_timeout_activate(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct be_ctx *bctx, + struct be_svc_data *svc, + const unsigned long timeout_seconds) +{ + struct timeval tv; + struct be_primary_server_ctx *ctx; + struct be_failover_ctx *fo_ctx = bctx->be_fo; + + if (fo_ctx->primary_server_handler != NULL) { + DEBUG(SSSDBG_TRACE_FUNC, ("The primary server reconnection " + "is already scheduled\n")); + return EOK; + } + + ctx = talloc_zero(mem_ctx, struct be_primary_server_ctx); + if (ctx == NULL) { + return ENOMEM; + } + + ctx->bctx = bctx; + ctx->ev = ev; + ctx->svc = svc; + ctx->timeout = timeout_seconds; + + tv = tevent_timeval_current(); + tv = tevent_timeval_add(&tv, timeout_seconds, 0); + fo_ctx->primary_server_handler = tevent_add_timer(ev, bctx, tv, + be_primary_server_timeout, ctx); + if (fo_ctx->primary_server_handler == NULL) { + DEBUG(SSSDBG_CRIT_FAILURE, ("tevent_add_timer failed.\n")); + talloc_free(ctx); + return ENOMEM; + } + + DEBUG(SSSDBG_TRACE_INTERNAL, ("Primary server reactivation timeout set " + "to %lu seconds\n", timeout_seconds)); + return EOK; +} + + static void be_resolve_server_done(struct tevent_req *subreq); struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, @@ -389,35 +524,66 @@ struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, static void be_resolve_server_done(struct tevent_req *subreq) { + struct tevent_req *new_subreq; struct tevent_req *req = tevent_req_callback_data(subreq, struct tevent_req); struct be_resolve_server_state *state = tevent_req_data(req, struct be_resolve_server_state); - struct be_svc_callback *callback; int ret; + + ret = be_resolve_server_process(subreq, state, &new_subreq); + talloc_zfree(subreq); + if (ret == EAGAIN) { + tevent_req_set_callback(new_subreq, be_resolve_server_done, req); + return; + } else if (ret != EOK) { + goto fail; + } + + if (!fo_is_server_primary(state->srv)) { + /* FIXME: make the timeout configurable */ + ret = be_primary_server_timeout_activate(state->ctx, state->ev, + state->ctx, state->svc, + 30); + if (ret != EOK) { + goto fail; + } + } + + tevent_req_done(req); + return; + +fail: + DEBUG(SSSDBG_TRACE_LIBS, ("Server resolution failed: %d\n", ret)); + state->svc->first_resolved = NULL; + tevent_req_error(req, ret); +} + +errno_t be_resolve_server_process(struct tevent_req *subreq, + struct be_resolve_server_state *state, + struct tevent_req **new_subreq) +{ + errno_t ret; time_t srv_status_change; + struct be_svc_callback *callback; ret = fo_resolve_service_recv(subreq, &state->srv); - talloc_zfree(subreq); switch (ret) { case EOK: if (!state->srv) { - ret = EFAULT; - goto fail; + return EFAULT; } break; case ENOENT: /* all servers have been tried and none * was found good, go offline */ - ret = EIO; - goto fail; + return EIO; default: /* mark server as bad and retry */ if (!state->srv) { - ret = EFAULT; - goto fail; + return EFAULT; } DEBUG(SSSDBG_MINOR_FAILURE, ("Couldn't resolve server (%s), resolver returned (%d)\n", @@ -425,9 +591,8 @@ static void be_resolve_server_done(struct tevent_req *subreq) state->attempts++; if (state->attempts >= 10) { - DEBUG(2, ("Failed to find a server after 10 attempts\n")); - ret = EIO; - goto fail; + DEBUG(SSSDBG_OP_FAILURE, ("Failed to find a server after 10 attempts\n")); + return EIO; } /* now try next one */ @@ -437,12 +602,14 @@ static void be_resolve_server_done(struct tevent_req *subreq) state->ctx->be_fo->fo_ctx, state->svc->fo_service); if (!subreq) { - ret = ENOMEM; - goto fail; + return ENOMEM; } - tevent_req_set_callback(subreq, be_resolve_server_done, req); - return; + if (new_subreq) { + *new_subreq = subreq; + } + + return EAGAIN; } /* all fine we got the server */ @@ -452,8 +619,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) } else if (state->svc->first_resolved == state->srv) { DEBUG(SSSDBG_OP_FAILURE, ("The fail over cycled through all available servers\n")); - ret = ENOENT; - goto fail; + return ENOENT; } if (DEBUG_IS_SET(SSSDBG_FUNC_DATA) && fo_get_server_name(state->srv)) { @@ -464,8 +630,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) DEBUG(SSSDBG_CRIT_FAILURE, ("FATAL: No hostent available for server (%s)\n", fo_get_server_str_name(state->srv))); - ret = EFAULT; - goto fail; + return EFAULT; } inet_ntop(srvaddr->family, srvaddr->addr_list[0]->ipaddr, @@ -492,13 +657,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) } } - tevent_req_done(req); - return; - -fail: - DEBUG(SSSDBG_TRACE_LIBS, ("Server resolution failed: %d\n", ret)); - state->svc->first_resolved = NULL; - tevent_req_error(req, ret); + return EOK; } int be_resolve_server_recv(struct tevent_req *req, struct fo_server **srv) diff --git a/src/providers/dp_backend.h b/src/providers/dp_backend.h index 41dd3f6d0..8e897a1bd 100644 --- a/src/providers/dp_backend.h +++ b/src/providers/dp_backend.h @@ -231,7 +231,8 @@ int be_fo_add_srv_server(struct be_ctx *ctx, enum be_fo_protocol proto, bool proto_fallback, void *user_data); int be_fo_add_server(struct be_ctx *ctx, const char *service_name, - const char *server, int port, void *user_data); + const char *server, int port, void *user_data, + bool primary); struct tevent_req *be_resolve_server_send(TALLOC_CTX *memctx, struct tevent_context *ev, diff --git a/src/providers/fail_over.c b/src/providers/fail_over.c index 5ef1436de..a16ab3363 100644 --- a/src/providers/fail_over.c +++ b/src/providers/fail_over.c @@ -72,6 +72,7 @@ struct fo_server { struct fo_server *prev; struct fo_server *next; + bool primary; void *user_data; int port; int port_status; @@ -577,7 +578,7 @@ fo_add_srv_server(struct fo_service *service, const char *srv, static struct fo_server * create_fo_server(struct fo_service *service, const char *name, - int port, void *user_data) + int port, void *user_data, bool primary) { struct fo_server *server; int ret; @@ -590,6 +591,7 @@ create_fo_server(struct fo_service *service, const char *name, server->user_data = user_data; server->service = service; server->port_status = DEFAULT_PORT_STATUS; + server->primary = primary; if (name != NULL) { ret = get_server_common(server, service->ctx, name, &server->common); @@ -621,26 +623,42 @@ fo_get_server_count(struct fo_service *service) return count; } +static bool fo_server_match(struct fo_server *server, + const char *name, + int port, + void *user_data) +{ + if (server->port != port || server->user_data != user_data) { + return false; + } + + if (name == NULL && server->common == NULL) { + return true; + } + + if (name != NULL && server->common != NULL) { + if (!strcasecmp(name, server->common->name)) + return true; + } + + return false; +} + int fo_add_server(struct fo_service *service, const char *name, int port, - void *user_data) + void *user_data, bool primary) { struct fo_server *server; DEBUG(3, ("Adding new server '%s', to service '%s'\n", name ? name : "(no name)", service->name)); DLIST_FOR_EACH(server, service->server_list) { - if (server->port != port || server->user_data != user_data) - continue; - if (name == NULL && server->common == NULL) { + if (fo_server_match(server, name, port, user_data)) { return EEXIST; - } else if (name != NULL && server->common != NULL) { - if (!strcasecmp(name, server->common->name)) - return EEXIST; } } - server = create_fo_server(service, name, port, user_data); + server = create_fo_server(service, name, port, user_data, primary); if (!server) { return ENOMEM; } @@ -658,7 +676,7 @@ get_first_server_entity(struct fo_service *service, struct fo_server **_server) /* If we already have a working server, use that one. */ server = service->active_server; if (server != NULL) { - if (service_works(server)) { + if (service_works(server) && fo_is_server_primary(server)) { goto done; } service->active_server = NULL; @@ -668,17 +686,27 @@ get_first_server_entity(struct fo_service *service, struct fo_server **_server) * Otherwise iterate through the server list. */ - /* First, try servers after the last one we tried. */ - if (service->last_tried_server != NULL) { + + /* First, try primary servers after the last one we tried. + * (only if the last one was primary as well) + */ + if (service->last_tried_server != NULL && + service->last_tried_server->primary) { DLIST_FOR_EACH(server, service->last_tried_server->next) { + /* Go only through primary servers */ + if (!server->primary) continue; + if (service_works(server)) { goto done; } } } - /* If none were found, try at the start. */ + /* If none were found, try at the start, primary first */ DLIST_FOR_EACH(server, service->server_list) { + /* First iterate only over primary servers */ + if (!server->primary) continue; + if (service_works(server)) { goto done; } @@ -687,6 +715,15 @@ get_first_server_entity(struct fo_service *service, struct fo_server **_server) } } + DLIST_FOR_EACH(server, service->server_list) { + /* Now iterate only over backup servers */ + if (server->primary) continue; + + if (service_works(server)) { + goto done; + } + } + service->last_tried_server = NULL; return ENOENT; @@ -727,6 +764,8 @@ set_lookup_hook(struct fo_server *server, struct tevent_req *req) return EOK; } + + /******************************************************************* * Get server to connect to. * *******************************************************************/ @@ -740,7 +779,6 @@ struct resolve_service_state { struct fo_ctx *fo_ctx; }; - static errno_t fo_resolve_service_activate_timeout(struct tevent_req *req, struct tevent_context *ev, const unsigned long timeout_seconds); static void fo_resolve_service_cont(struct tevent_req *subreq); @@ -1171,7 +1209,8 @@ resolve_srv_done(struct tevent_req *subreq) for (reply = reply_list; reply; reply = reply->next) { server = create_fo_server(state->service, reply->host, - reply->port, state->meta->user_data); + reply->port, state->meta->user_data, + true); if (!server) { ret = ENOMEM; goto fail; @@ -1451,6 +1490,12 @@ fo_get_server_hostent(struct fo_server *server) return server->common->rhostent; } +bool +fo_is_server_primary(struct fo_server *server) +{ + return server->primary; +} + time_t fo_get_server_hostname_last_change(struct fo_server *server) { diff --git a/src/providers/fail_over.h b/src/providers/fail_over.h index 8fbbe251b..b69e8a532 100644 --- a/src/providers/fail_over.h +++ b/src/providers/fail_over.h @@ -116,9 +116,8 @@ int fo_get_server_count(struct fo_service *service); * connection. If 'name' is NULL, no server resolution will be done. */ int fo_add_server(struct fo_service *service, - const char *name, - int port, - void *user_data); + const char *name, int port, + void *user_data, bool primary); int fo_add_srv_server(struct fo_service *service, @@ -180,6 +179,8 @@ const char *fo_get_server_str_name(struct fo_server *server); struct resolv_hostent *fo_get_server_hostent(struct fo_server *server); +bool fo_is_server_primary(struct fo_server *server); + time_t fo_get_server_hostname_last_change(struct fo_server *server); int fo_is_srv_lookup(struct fo_server *s); diff --git a/src/providers/ipa/ipa_common.c b/src/providers/ipa/ipa_common.c index 148a8b711..98a7c58f9 100644 --- a/src/providers/ipa/ipa_common.c +++ b/src/providers/ipa/ipa_common.c @@ -892,7 +892,7 @@ int ipa_service_init(TALLOC_CTX *memctx, struct be_ctx *ctx, continue; } - ret = be_fo_add_server(ctx, "IPA", list[i], 0, NULL); + ret = be_fo_add_server(ctx, "IPA", list[i], 0, NULL, true); if (ret && ret != EEXIST) { DEBUG(0, ("Failed to add server\n")); goto done; diff --git a/src/providers/krb5/krb5_common.c b/src/providers/krb5/krb5_common.c index e06827018..19fbd76ed 100644 --- a/src/providers/krb5/krb5_common.c +++ b/src/providers/krb5/krb5_common.c @@ -586,7 +586,7 @@ int krb5_service_init(TALLOC_CTX *memctx, struct be_ctx *ctx, } ret = be_fo_add_server(ctx, service_name, server_spec, (int) port, - list[i]); + list[i], true); if (ret && ret != EEXIST) { DEBUG(0, ("Failed to add server\n")); goto done; diff --git a/src/providers/ldap/ldap_common.c b/src/providers/ldap/ldap_common.c index 29aa029ba..24c6e124e 100644 --- a/src/providers/ldap/ldap_common.c +++ b/src/providers/ldap/ldap_common.c @@ -1197,8 +1197,8 @@ int sdap_service_init(TALLOC_CTX *memctx, struct be_ctx *ctx, talloc_steal(service, list[i]); - ret = be_fo_add_server(ctx, service->name, - lud->lud_host, lud->lud_port, list[i]); + ret = be_fo_add_server(ctx, service->name, lud->lud_host, + lud->lud_port, list[i], true); ldap_free_urldesc(lud); if (ret) { goto done; diff --git a/src/tests/fail_over-tests.c b/src/tests/fail_over-tests.c index 8d6bdd3de..6f4843ca2 100644 --- a/src/tests/fail_over-tests.c +++ b/src/tests/fail_over-tests.c @@ -230,14 +230,15 @@ START_TEST(test_fo_resolve_service) fail_if(fo_new_service(ctx->fo_ctx, "ntp", &service[2]) != EOK); /* Add servers. */ - fail_if(fo_add_server(service[0], "localhost", 20, NULL) != EOK); - fail_if(fo_add_server(service[0], "127.0.0.1", 80, NULL) != EOK); + fail_if(fo_add_server(service[0], "localhost", 20, NULL, true) != EOK); + fail_if(fo_add_server(service[0], "127.0.0.1", 80, NULL, false) != EOK); - fail_if(fo_add_server(service[1], "localhost", 30, NULL) != EOK); - fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL) != EOK); - fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL) != EEXIST); + fail_if(fo_add_server(service[1], "localhost", 30, NULL, false) != EOK); + fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL, true) != EOK); + fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL, true) != EEXIST); + fail_if(fo_add_server(service[1], "127.0.0.1", 389, NULL, false) != EEXIST); - fail_if(fo_add_server(service[2], NULL, 123, NULL) != EOK); + fail_if(fo_add_server(service[2], NULL, 123, NULL, true) != EOK); /* Make requests. */ get_request(ctx, service[0], EOK, 20, PORT_WORKING, -1); |