diff options
author | Nicolas Williams <nico@cryptonector.com> | 2012-09-24 21:09:17 -0500 |
---|---|---|
committer | Greg Hudson <ghudson@mit.edu> | 2012-10-05 13:33:18 -0400 |
commit | f1c85fbb0ab9e62b2790647b2681aec4d5fa4585 (patch) | |
tree | 22737dc9984a268c2b55c7229190d6e0ff5f27c6 /src/slave | |
parent | 70a119d4dc7ed7a94cfc32c523352af1d000e1c7 (diff) | |
download | krb5-f1c85fbb0ab9e62b2790647b2681aec4d5fa4585.tar.gz krb5-f1c85fbb0ab9e62b2790647b2681aec4d5fa4585.tar.xz krb5-f1c85fbb0ab9e62b2790647b2681aec4d5fa4585.zip |
Improve kpropd behavior in iprop mode
- Make kpropd in iprop mode fork a child to listen for kprops from the
master. The child writes progress and outcome reports to the parent
for each kprop. This fixes a race between asking for a full resync
and setting up a listener socket for it.
- Add runonce (-t) for kpropd do_standalone() too.
- Add a new iprop parameter: iprop_resync_timeout. kpropd will keep
asking for incremental updates while waiting for a full resync to
finish, and will re-request a full resync if kadmind continues to
indicate that one is needed after this timeout passes since the
previous full resync was requested.
- Allow polling intervals less than 10 seconds.
[ghudson@mit.edu: split out debug output changes; note polling interval
change in commit message]
ticket: 7373
Diffstat (limited to 'src/slave')
-rw-r--r-- | src/slave/kpropd.c | 396 |
1 files changed, 174 insertions, 222 deletions
diff --git a/src/slave/kpropd.c b/src/slave/kpropd.c index 309717dd99..b2899bb8f3 100644 --- a/src/slave/kpropd.c +++ b/src/slave/kpropd.c @@ -101,17 +101,11 @@ extern int daemon(int, int); #endif #define SYSLOG_CLASS LOG_DAEMON -#define INITIAL_TIMER 10 char *def_realm = NULL; int runonce = 0; /* - * Global fd to close upon alarm time-out. - */ -volatile int gfd = -1; - -/* * This struct simulates the use of _kadm5_server_handle_t * * This is a COPY of kadm5_server_handle_t from @@ -132,11 +126,16 @@ typedef struct _kadm5_iprop_handle_t { static char *kprop_version = KPROP_PROT_VERSION; +static kadm5_config_params params; + char *progname; int debug = 0; +int nodaemon = 0; char *srvtab = 0; int standalone = 0; +pid_t fullprop_child = (pid_t)-1; + krb5_principal server; /* This is our server principal name */ krb5_principal client; /* This is who we're talking to */ krb5_context kpropd_context; @@ -156,7 +155,7 @@ char **db_args = NULL; int db_args_size = 0; void PRS(char**); -int do_standalone(iprop_role iproprole); +void do_standalone(void); void doit(int); krb5_error_code do_iprop(kdb_log_context *log_ctx); void kerberos_authenticate(krb5_context, int, krb5_principal *, @@ -183,59 +182,142 @@ static void usage() exit(1); } +typedef void (*sig_handler_fn)(int sig); + +static void +signal_wrapper(int sig, sig_handler_fn handler) +{ +#ifdef POSIX_SIGNALS + struct sigaction s_action; + memset(&s_action, 0, sizeof(s_action)); + sigemptyset(&s_action.sa_mask); + s_action.sa_handler = handler; + sigaction(sig, &s_action, NULL); +#else + signal(sig, handler); +#endif +} + +static void +alarm_handler(int sig) +{ + static char *timeout_msg = "Full propagation timed out\n"; + write(STDERR_FILENO, timeout_msg, strlen(timeout_msg)); + exit(1); +} + +static void +kill_do_standalone(int sig) +{ + if (fullprop_child > 0) { + if (debug) { + fprintf(stderr, _("Killing fullprop child (%d)\n"), + (int)fullprop_child); + } + kill(fullprop_child, sig); + } + /* Make sure our exit status code reflects our having been signaled */ + signal_wrapper(sig, SIG_DFL); + kill(getpid(), sig); +} + +static void +atexit_kill_do_standalone(void) +{ + if (fullprop_child > 0) + kill(fullprop_child, SIGHUP); +} + int main(argc, argv) int argc; char **argv; { krb5_error_code retval; - int ret = 0; kdb_log_context *log_ctx; + int devnull, sock; setlocale(LC_ALL, ""); PRS(argv); log_ctx = kpropd_context->kdblog_context; - { -#ifdef POSIX_SIGNALS - struct sigaction s_action; - memset(&s_action, 0, sizeof(s_action)); - sigemptyset(&s_action.sa_mask); - s_action.sa_handler = SIG_IGN; - sigaction(SIGPIPE, &s_action, NULL); -#else - signal(SIGPIPE, SIG_IGN); -#endif - } + signal_wrapper(SIGPIPE, SIG_IGN); - if (log_ctx && (log_ctx->iproprole == IPROP_SLAVE)) { + if (standalone) { + /* "ready" is a sentinel for the test framework. */ + if (!debug && !nodaemon) { + daemon(0, 0); + } else { + printf(_("ready\n")); + fflush(stdout); + } + } else { /* - * We wanna do iprop ! + * We're an inetd nowait service. Let's not risk anything + * read/write from/to the inetd socket unintentionally. */ - retval = do_iprop(log_ctx); - if (retval) { - com_err(progname, retval, - _("do_iprop failed.\n")); + devnull = open("/dev/null", O_RDWR); + if (devnull == -1) { + syslog(LOG_ERR, _("Could not open /dev/null: %s"), + strerror(errno)); exit(1); } - } else { - if (standalone) - ret = do_standalone(IPROP_NULL); - else - doit(0); + sock = dup(0); + if (sock == -1) { + syslog(LOG_ERR, _("Could not dup the inetd socket: %s"), + strerror(errno)); + exit(1); + } + + dup2(devnull, STDIN_FILENO); + dup2(devnull, STDOUT_FILENO); + dup2(devnull, STDERR_FILENO); + close(devnull); + doit(sock); + exit(0); } - exit(ret); -} + if (log_ctx == NULL || log_ctx->iproprole != IPROP_SLAVE) { + do_standalone(); + /* do_standalone() should never return */ + assert(0); + } -static void resync_alarm(int sn) -{ - close (gfd); - if (debug) - fprintf(stderr, _("resync_alarm: closing fd: %d\n"), gfd); - gfd = -1; + /* + * This is the iprop case. We'll fork a child to run do_standalone(). + * The parent will run do_iprop(). We try to kill the child if we + * get killed. + */ + signal_wrapper(SIGHUP, kill_do_standalone); + signal_wrapper(SIGINT, kill_do_standalone); + signal_wrapper(SIGQUIT, kill_do_standalone); + signal_wrapper(SIGTERM, kill_do_standalone); + signal_wrapper(SIGSEGV, kill_do_standalone); + atexit(atexit_kill_do_standalone); + fullprop_child = fork(); + switch (fullprop_child) { + case -1: + com_err(progname, errno, _("do_iprop failed.\n")); + break; + case 0: + do_standalone(); + /* do_standalone() should never return */ + /* NOTREACHED */ + break; + default: + retval = do_iprop(log_ctx); + /* do_iprop() can return due to failures and runonce. */ + kill(fullprop_child, SIGHUP); + wait(NULL); + if (retval) + com_err(progname, retval, _("do_iprop failed.\n")); + else + exit(0); + } + + exit(1); } /* Use getaddrinfo to determine a wildcard listener address, preferring @@ -257,19 +339,16 @@ get_wildcard_addr(struct addrinfo **res) return getaddrinfo(NULL, port, &hints, res); } -int do_standalone(iprop_role iproprole) +void +do_standalone() { struct sockaddr_in frominet; struct addrinfo *res; int finet, s; GETPEERNAME_ARG3_TYPE fromlen; - int ret, error, val; - /* - * Timer for accept/read calls, in case of network type errors. - */ - int backoff_timer = INITIAL_TIMER; - -retry: + int ret, error, val, status; + pid_t child_pid; + pid_t wait_pid; error = get_wildcard_addr(&res); if (error != 0) { @@ -296,60 +375,15 @@ retry: com_err(progname, errno, _("while unsetting IPV6_V6ONLY option")); #endif - /* - * We need to close the socket immediately if iprop is enabled, - * since back-to-back full resyncs are possible, so we do not - * linger around for too long - */ - if (iproprole == IPROP_SLAVE) { - struct linger linger; - - linger.l_onoff = 1; - linger.l_linger = 2; - if (setsockopt(finet, SOL_SOCKET, SO_LINGER, - (void *)&linger, sizeof(linger)) < 0) - com_err(progname, errno, - _("while setting socket option (SO_LINGER)")); - /* - * We also want to set a timer so that the slave is not waiting - * until infinity for an update from the master. - */ - gfd = finet; - signal(SIGALRM, resync_alarm); - if (debug) { - fprintf(stderr, "do_standalone: setting resync alarm to %d\n", - backoff_timer); - } - if (alarm(backoff_timer) != 0) { - if (debug) { - fprintf(stderr, - _("%s: alarm already set\n"), progname); - } - } - backoff_timer *= 2; - } if ((ret = bind(finet, res->ai_addr, res->ai_addrlen)) < 0) { com_err(progname, errno, _("while binding listener socket")); exit(1); } - if (!debug && iproprole != IPROP_SLAVE) - daemon(1, 0); -#ifdef PID_FILE - if ((pidfile = fopen(PID_FILE, "w")) != NULL) { - fprintf(pidfile, "%d\n", getpid()); - fclose(pidfile); - } else - com_err(progname, errno, - _("while opening pid file %s for writing"), PID_FILE); -#endif if (listen(finet, 5) < 0) { com_err(progname, errno, "in listen call"); exit(1); } while (1) { - int child_pid; - int status; - memset(&frominet, 0, sizeof(frominet)); fromlen = sizeof(frominet); if (debug) @@ -361,30 +395,9 @@ retry: if (e != EINTR) { com_err(progname, e, _("while accepting connection")); - if (e != EBADF) - backoff_timer = INITIAL_TIMER; } - /* - * If we got EBADF, an alarm signal handler closed - * the file descriptor on us. - */ - if (e != EBADF) - close(finet); - /* - * An alarm could have been set and the fd closed, we - * should retry in case of transient network error for - * up to a couple of minutes. - */ - if (backoff_timer > 120) - return EINTR; - goto retry; } - alarm(0); - gfd = -1; - if (debug && iproprole != IPROP_SLAVE) - child_pid = 0; - else - child_pid = fork(); + child_pid = fork(); switch (child_pid) { case -1: com_err(progname, errno, _("while forking")); @@ -396,31 +409,22 @@ retry: close(s); _exit(0); default: - /* - * Errors should not be considered fatal in the - * iprop case as we could have transient type - * errors, such as network outage, etc. Sleeping - * 3s for 2s linger interval. - */ - if (wait(&status) < 0) { + do { + wait_pid = waitpid(child_pid, &status, 0); + } while (wait_pid == -1 && errno == EINTR); + if (wait_pid == -1) { com_err(progname, errno, _("while waiting to receive database")); - if (iproprole != IPROP_SLAVE) - exit(1); - sleep(3); + exit(1); } close(s); - if (iproprole == IPROP_SLAVE) { - close(finet); - if ((ret = WEXITSTATUS(status)) != 0) - return (ret); - } + + if (runonce) + break; } - if (iproprole == IPROP_SLAVE) - break; } - return 0; + exit(0); } void doit(fd) @@ -437,23 +441,8 @@ void doit(fd) int database_fd; char host[INET6_ADDRSTRLEN+1]; - if (kpropd_context->kdblog_context && - kpropd_context->kdblog_context->iproprole == IPROP_SLAVE) { - /* - * We also want to set a timer so that the slave is not waiting - * until infinity for an update from the master. - */ - if (debug) - fprintf(stderr, "doit: setting resync alarm to 5s\n"); - signal(SIGALRM, resync_alarm); - gfd = fd; - if (alarm(INITIAL_TIMER) != 0) { - if (debug) { - fprintf(stderr, - _("%s: alarm already set\n"), progname); - } - } - } + signal_wrapper(SIGALRM, alarm_handler); + alarm(params.iprop_resync_timeout); fromlen = sizeof (from); if (getpeername(fd, (struct sockaddr *) &from, &fromlen) < 0) { #ifdef ENOTSOCK @@ -488,12 +477,6 @@ void doit(fd) */ kerberos_authenticate(kpropd_context, fd, &client, &etype, &from); - /* - * Turn off alarm upon successful authentication from master. - */ - alarm(0); - gfd = -1; - if (!authorized_principal(kpropd_context, client, etype)) { char *name; @@ -601,10 +584,13 @@ full_resync(CLIENT *clnt) } /* - * Routine to handle incremental update transfer(s) from master KDC + * Beg for incrementals from the KDC. + * + * Returns 0 on success IFF runonce is true. + * Returns non-zero on failure due to errors. */ -kadm5_config_params params; -krb5_error_code do_iprop(kdb_log_context *log_ctx) +krb5_error_code +do_iprop(kdb_log_context *log_ctx) { kadm5_ret_t retval; krb5_ccache cc; @@ -615,24 +601,21 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) unsigned int pollin, backoff_time; int backoff_cnt = 0; int reinit_cnt = 0; - int ret; - int frdone = 0; + time_t frrequested = 0; + time_t now; kdb_incr_result_t *incr_ret; - static kdb_last_t mylast; + kdb_last_t mylast; kdb_fullresync_result_t *full_ret; kadm5_iprop_handle_t handle; kdb_hlog_t *ulog; - if (!debug) - daemon(0, 0); - ulog = log_ctx->ulog; pollin = params.iprop_poll_time; - if (pollin < 10) + if (pollin == 0) pollin = 10; /* @@ -643,7 +626,7 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) if (retval) { com_err(progname, retval, _("Unable to get default realm")); - exit(1); + return retval; } } @@ -658,7 +641,7 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) _("%s: unable to get kiprop host based " "service name for realm %s\n"), progname, def_realm); - exit(1); + return retval; } } @@ -669,7 +652,7 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) com_err(progname, retval, _("while opening default " "credentials cache")); - exit(1); + return retval; } retval = krb5_sname_to_principal(kpropd_context, NULL, KIPROP_SVC_NAME, @@ -677,7 +660,7 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) if (retval) { com_err(progname, retval, _("while trying to construct host service principal")); - exit(1); + return retval; } /* XXX referrals? */ @@ -691,7 +674,7 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) if (r->data == NULL) { com_err(progname, retval, _("while determining local service principal name")); - exit(1); + return retval; } /* XXX Memory leak: Old r->data value. */ } @@ -700,7 +683,7 @@ krb5_error_code do_iprop(kdb_log_context *log_ctx) com_err(progname, retval, _("while canonicalizing principal name")); krb5_free_principal(kpropd_context, iprop_svc_principal); - exit(1); + return retval; } krb5_free_principal(kpropd_context, iprop_svc_principal); @@ -792,14 +775,19 @@ reinit: case UPDATE_FULL_RESYNC_NEEDED: /* - * We dont do a full resync again, if the last - * X'fer was a resync and if the master sno is - * still "0", i.e. no updates so far. + * If we're already asked for a full resync and we still + * need one and the last one hasn't timed out then just keep + * asking for updates as eventually the resync will finish + * (or, if it times out we'll just try again). Note that + * doit() also applies a timeout to the full resync, thus + * it's OK for us to do the same here. */ - if ((frdone == 1) && (incr_ret->lastentry.last_sno - == 0)) { + now = time(NULL); + if (frrequested && + (now - frrequested) < params.iprop_resync_timeout) { break; } else { + frrequested = now; full_ret = full_resync(handle->clnt); if (full_ret == (kdb_fullresync_result_t *) NULL) { @@ -817,28 +805,6 @@ reinit: switch (full_ret->ret) { case UPDATE_OK: backoff_cnt = 0; - /* - * We now listen on the kprop port for - * the full dump - */ - ret = do_standalone(log_ctx->iproprole); - if (debug) { - if (ret) - fprintf(stderr, - _("Full resync " - "was unsuccessful\n")); - else - fprintf(stderr, - _("Full resync " - "was successful\n")); - } - if (ret) { - syslog(LOG_WARNING, - _("kpropd: Full resync, invalid return.")); - frdone = 0; - backoff_cnt++; - } else - frdone = 1; break; case UPDATE_BUSY: @@ -852,7 +818,6 @@ reinit: case UPDATE_NIL: default: backoff_cnt = 0; - frdone = 0; syslog(LOG_ERR, _("kpropd: Full resync," " invalid return from master KDC.")); break; @@ -871,7 +836,7 @@ reinit: case UPDATE_OK: backoff_cnt = 0; - frdone = 0; + frrequested = 0; /* * ulog_replay() will convert the ulog updates to db @@ -920,7 +885,7 @@ reinit: fprintf(stderr, _("Master, slave KDC's " "are in-sync, no updates\n")); backoff_cnt = 0; - frdone = 0; + frrequested = 0; break; default: @@ -965,7 +930,7 @@ done: if ((retval = krb5_cc_close(kpropd_context, cc))) { com_err(progname, retval, _("while closing default ccache")); - exit(1); + return retval; } if (def_realm && kpropd_context) krb5_free_default_realm(kpropd_context, def_realm); @@ -977,7 +942,7 @@ done: if (runonce == 1) return (0); else - exit(1); + return 1; } @@ -1096,6 +1061,9 @@ void PRS(argv) usage(); word = 0; break; + case 'D': + nodaemon++; + break; case 'd': debug++; break; @@ -1117,7 +1085,7 @@ void PRS(argv) * Undocumented option - for testing only. * * Option to run the kpropd server exactly - * once (this is true only if iprop is enabled). + * once. */ runonce = 1; break; @@ -1580,7 +1548,7 @@ load_database(context, kdb_util, database_file_name) char *database_file_name; { static char *edit_av[10]; - int error_ret, save_stderr = -1; + int error_ret; int child_pid; int count; @@ -1594,7 +1562,6 @@ load_database(context, kdb_util, database_file_name) #else int waitb; #endif - krb5_error_code retval; kdb_log_context *log_ctx; if (debug) @@ -1624,23 +1591,8 @@ load_database(context, kdb_util, database_file_name) com_err(progname, errno, _("while trying to fork %s"), kdb_util); exit(1); case 0: - if (!debug) { - save_stderr = dup(2); - close(0); - close(1); - close(2); - open("/dev/null", O_RDWR); - dup(0); - dup(0); - } - - if (execv(kdb_util, edit_av) < 0) - retval = errno; - else - retval = 0; - if (!debug) - dup2(save_stderr, 2); - com_err(progname, retval, _("while trying to exec %s"), kdb_util); + execv(kdb_util, edit_av); + com_err(progname, errno, _("while trying to exec %s"), kdb_util); _exit(1); /*NOTREACHED*/ default: |