Pulled from SVN, then munged to apply to 1.9. Modifies cm.h so that a struct select_state has an alternate layout when USE_POLL is defined, and if we detect at configure-time, have sendto_kdc.c define USE_POLL to force its use. Adapts sendto_kdc.c to handle both cases, so that the previous behavior is preserved when is not found. RT#6905 Index: src/include/cm.h =================================================================== --- src/include/cm.h (revision 24907) +++ src/include/cm.h (revision 24908) @@ -24,11 +24,20 @@ * or implied warranty. */ -/* Since fd_set is large on some platforms (8K on AIX 5.2), this - probably shouldn't be allocated in automatic storage. */ +/* + * Since fd_set is large on some platforms (8K on AIX 5.2), this probably + * shouldn't be allocated in automatic storage. Define USE_POLL and + * MAX_POLLFDS in the consumer of this header file to use poll state instead of + * select state. + */ struct select_state { - int max, nfds; +#ifdef USE_POLL + struct pollfd fds[MAX_POLLFDS]; +#else + int max; fd_set rfds, wfds, xfds; +#endif + int nfds; struct timeval end_time; /* magic: tv_sec==0 => never time out */ }; Index: src/configure.in =================================================================== --- src/configure.in (revision 24907) +++ src/configure.in (revision 24908) @@ -67,7 +67,7 @@ ]) AC_SUBST(LIBUTIL) # for kdc -AC_CHECK_HEADERS(syslog.h stdarg.h sys/select.h sys/sockio.h ifaddrs.h unistd.h) +AC_CHECK_HEADERS(syslog.h stdarg.h sys/sockio.h ifaddrs.h unistd.h) AC_CHECK_FUNCS(openlog syslog closelog strftime vsprintf vasprintf vsnprintf) AC_CHECK_FUNCS(strlcpy) EXTRA_SUPPORT_SYMS= @@ -472,7 +472,7 @@ AC_DEFINE(POSIX_TERMIOS,1,[Define if termios.h exists and tcsetattr exists]))]) KRB5_SIGTYPE -AC_CHECK_HEADERS(stdlib.h string.h stddef.h sys/types.h sys/file.h sys/param.h sys/stat.h sys/time.h netinet/in.h sys/uio.h sys/filio.h sys/select.h time.h paths.h errno.h) +AC_CHECK_HEADERS(poll.h stdlib.h string.h stddef.h sys/types.h sys/file.h sys/param.h sys/stat.h sys/time.h netinet/in.h sys/uio.h sys/filio.h sys/select.h time.h paths.h errno.h) AC_HEADER_STDARG KRB5_AC_INET6 Index: src/lib/krb5/os/cm.c =================================================================== --- src/lib/krb5/os/cm.c (revision 0) +++ src/lib/krb5/os/cm.c (revision 24908) @@ -0,0 +1,97 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* lib/krb5/os/cm.c - Connection manager functions */ +/* + * Copyright (C) 2011 by the Massachusetts Institute of Technology. + * All rights reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + */ + +/* + * This file include krb5int_cm_call_select, which is used by + * lib/apputils/net-server.c and sometimes by sendto_kdc.c. + */ + +#include "k5-int.h" +#ifdef HAVE_SYS_SELECT_H +#include +#endif +#ifdef _WIN32 +#include +#endif +#include "cm.h" + +int +k5_getcurtime(struct timeval *tvp) +{ +#ifdef _WIN32 + struct _timeb tb; + _ftime(&tb); + tvp->tv_sec = tb.time; + tvp->tv_usec = tb.millitm * 1000; + /* Can _ftime fail? */ + return 0; +#else + if (gettimeofday(tvp, 0)) + return errno; + return 0; +#endif +} + +/* + * Call select and return results. + * Input: interesting file descriptors and absolute timeout + * Output: select return value (-1 or num fds ready) and fd_sets + * Return: 0 (for i/o available or timeout) or error code. + */ +krb5_error_code +krb5int_cm_call_select (const struct select_state *in, + struct select_state *out, int *sret) +{ + struct timeval now, *timo; + krb5_error_code e; + + *out = *in; + e = k5_getcurtime(&now); + if (e) + return e; + if (out->end_time.tv_sec == 0) + timo = 0; + else { + timo = &out->end_time; + out->end_time.tv_sec -= now.tv_sec; + out->end_time.tv_usec -= now.tv_usec; + if (out->end_time.tv_usec < 0) { + out->end_time.tv_usec += 1000000; + out->end_time.tv_sec--; + } + if (out->end_time.tv_sec < 0) { + *sret = 0; + return 0; + } + } + + *sret = select(out->max, &out->rfds, &out->wfds, &out->xfds, timo); + e = SOCKET_ERRNO; + + if (*sret < 0) + return e; + return 0; +} Index: src/lib/krb5/os/Makefile.in =================================================================== --- src/lib/krb5/os/Makefile.in (revision 24907) +++ src/lib/krb5/os/Makefile.in (revision 24908) @@ -18,6 +18,7 @@ def_realm.o \ ccdefname.o \ changepw.o \ + cm.o \ dnsglue.o \ dnssrv.o \ free_krbhs.o \ @@ -62,6 +63,7 @@ $(OUTPRE)def_realm.$(OBJEXT) \ $(OUTPRE)ccdefname.$(OBJEXT) \ $(OUTPRE)changepw.$(OBJEXT) \ + $(OUTPRE)cm.$(OBJEXT) \ $(OUTPRE)dnsglue.$(OBJEXT) \ $(OUTPRE)dnssrv.$(OBJEXT) \ $(OUTPRE)free_krbhs.$(OBJEXT) \ @@ -106,6 +108,7 @@ $(srcdir)/def_realm.c \ $(srcdir)/ccdefname.c \ $(srcdir)/changepw.c \ + $(srcdir)/cm.c \ $(srcdir)/dnsglue.c \ $(srcdir)/dnssrv.c \ $(srcdir)/free_krbhs.c \ Index: src/lib/krb5/os/os-proto.h =================================================================== --- src/lib/krb5/os/os-proto.h (revision 24907) +++ src/lib/krb5/os/os-proto.h (revision 24908) @@ -32,6 +32,10 @@ #ifndef KRB5_LIBOS_INT_PROTO__ #define KRB5_LIBOS_INT_PROTO__ +#ifdef HAVE_SYS_TIME_H +#include +#endif + struct addrlist; krb5_error_code krb5_locate_kdc(krb5_context, const krb5_data *, struct addrlist *, int, int, int); @@ -101,6 +105,8 @@ /* The io vector is *not* const here, unlike writev()! */ int krb5int_net_writev (krb5_context, int, sg_buf *, int); +int k5_getcurtime(struct timeval *tvp); + #include "k5-thread.h" extern k5_mutex_t krb5int_us_time_mutex; Index: src/lib/krb5/os/sendto_kdc.c =================================================================== --- src/lib/krb5/os/sendto_kdc.c (revision 24907) +++ src/lib/krb5/os/sendto_kdc.c (revision 24908) @@ -30,17 +30,16 @@ #include "fake-addrinfo.h" #include "k5-int.h" -#ifdef HAVE_SYS_TIME_H -#include -#else -#include -#endif #include "os-proto.h" #ifdef _WIN32 #include #endif -#ifdef _AIX +#if defined(HAVE_POLL_H) +#include +#define USE_POLL +#define MAX_POLLFDS 1024 +#elif defined(HAVE_SYS_SELECT_H) #include #endif @@ -168,29 +167,6 @@ p = strerror(err); putstr(p); break; - case 'F': - /* %F => fd_set *, fd_set *, fd_set *, int */ - rfds = va_arg(args, fd_set *); - wfds = va_arg(args, fd_set *); - xfds = va_arg(args, fd_set *); - maxfd = va_arg(args, int); - - for (i = 0; i < maxfd; i++) { - int r = FD_ISSET(i, rfds); - int w = wfds && FD_ISSET(i, wfds); - int x = xfds && FD_ISSET(i, xfds); - if (r || w || x) { - putf(" %d", i); - if (r) - putstr("r"); - if (w) - putstr("w"); - if (x) - putstr("x"); - } - } - putstr(" "); - break; case 's': /* %s => char * */ p = va_arg(args, const char *); @@ -437,75 +413,154 @@ #include "cm.h" -static int -getcurtime (struct timeval *tvp) +/* + * Currently only sendto_kdc.c knows how to use poll(); the other candidate + * user, lib/apputils/net-server.c, is stuck using select() for the moment + * since it is entangled with the RPC library. The following cm_* functions + * are not fully generic, are O(n^2) in the poll case, and are limited to + * handling 1024 connections (in order to maintain a constant-sized selstate). + * More rearchitecting would be appropriate before extending this support to + * the KDC and kadmind. + */ + +static void +cm_init_selstate(struct select_state *selstate) { -#ifdef _WIN32 - struct _timeb tb; - _ftime(&tb); - tvp->tv_sec = tb.time; - tvp->tv_usec = tb.millitm * 1000; - /* Can _ftime fail? */ - return 0; + selstate->nfds = 0; + selstate->end_time.tv_sec = selstate->end_time.tv_usec = 0; +#ifndef USE_POLL + selstate->max = 0; + selstate->nfds = 0; + FD_ZERO(&selstate->rfds); + FD_ZERO(&selstate->wfds); + FD_ZERO(&selstate->xfds); +#endif +} + +static krb5_boolean +cm_add_fd(struct select_state *selstate, int fd, unsigned int ssflags) +{ +#ifdef USE_POLL + if (selstate->nfds >= MAX_POLLFDS) + return FALSE; + selstate->fds[selstate->nfds].fd = fd; + selstate->fds[selstate->nfds].events = 0; + if (ssflags & SSF_READ) + selstate->fds[selstate->nfds].events |= POLLIN; + if (ssflags & SSF_WRITE) + selstate->fds[selstate->nfds].events |= POLLOUT; #else - if (gettimeofday(tvp, 0)) { - dperror("gettimeofday"); - return errno; +#ifndef _WIN32 /* On Windows FD_SETSIZE is a count, not a max value. */ + if (fd >= FD_SETSIZE) + return FALSE; +#endif + if (ssflags & SSF_READ) + FD_SET(fd, &selstate->rfds); + if (ssflags & SSF_WRITE) + FD_SET(fd, &selstate->wfds); + if (ssflags & SSF_EXCEPTION) + FD_SET(fd, &selstate->xfds); + if (selstate->max <= fd) + selstate->max = fd + 1; +#endif + selstate->nfds++; + return TRUE; +} + +static void +cm_remove_fd(struct select_state *selstate, int fd) +{ +#ifdef USE_POLL + int i; + + /* Find the FD in the array and move the last entry to its place. */ + assert(selstate->nfds > 0); + for (i = 0; i < selstate->nfds && selstate->fds[i].fd != fd; i++); + assert(i < selstate->nfds); + selstate->fds[i] = selstate->fds[selstate->nfds - 1]; +#else + FD_CLR(fd, &selstate->rfds); + FD_CLR(fd, &selstate->wfds); + FD_CLR(fd, &selstate->xfds); + if (selstate->max == 1 + fd) { + while (selstate->max > 0 + && ! FD_ISSET(selstate->max-1, &selstate->rfds) + && ! FD_ISSET(selstate->max-1, &selstate->wfds) + && ! FD_ISSET(selstate->max-1, &selstate->xfds)) + selstate->max--; + dprint("new max_fd + 1 is %d\n", selstate->max); } - return 0; #endif + selstate->nfds--; } -/* - * Call select and return results. - * Input: interesting file descriptors and absolute timeout - * Output: select return value (-1 or num fds ready) and fd_sets - * Return: 0 (for i/o available or timeout) or error code. - */ -krb5_error_code -krb5int_cm_call_select (const struct select_state *in, - struct select_state *out, int *sret) +static void +cm_unset_write(struct select_state *selstate, int fd) { - struct timeval now, *timo; - krb5_error_code e; +#ifdef USE_POLL + int i; - *out = *in; - e = getcurtime(&now); - if (e) - return e; - if (out->end_time.tv_sec == 0) - timo = 0; + for (i = 0; i < selstate->nfds && selstate->fds[i].fd != fd; i++); + assert(i < selstate->nfds); + selstate->fds[i].events &= ~POLLOUT; +#else + FD_CLR(fd, &selstate->wfds); +#endif +} + +static krb5_error_code +cm_select_or_poll(const struct select_state *in, struct select_state *out, + int *sret) +{ +#ifdef USE_POLL + struct timeval now; + int e, timeout; + + if (in->end_time.tv_sec == 0) + timeout = -1; else { - timo = &out->end_time; - out->end_time.tv_sec -= now.tv_sec; - out->end_time.tv_usec -= now.tv_usec; - if (out->end_time.tv_usec < 0) { - out->end_time.tv_usec += 1000000; - out->end_time.tv_sec--; - } - if (out->end_time.tv_sec < 0) { - *sret = 0; - return 0; - } + e = k5_getcurtime(&now); + if (e) + return e; + timeout = (in->end_time.tv_sec - now.tv_sec) * 1000 + + (in->end_time.tv_usec - now.tv_usec) / 1000; } - dprint("selecting on max=%d sockets [%F] timeout %t\n", - out->max, - &out->rfds, &out->wfds, &out->xfds, out->max, - timo); - *sret = select(out->max, &out->rfds, &out->wfds, &out->xfds, timo); + /* We don't need a separate copy of the selstate for poll, but use one + * anyone for consistency with the select wrapper. */ + *out = *in; + *sret = poll(out->fds, out->nfds, timeout); e = SOCKET_ERRNO; + return (*sret < 0) ? e : 0; +#else + /* Use the select wrapper from cm.c. */ + return krb5int_cm_call_select(in, out, sret); +#endif +} - dprint("select returns %d", *sret); - if (*sret < 0) - dprint(", error = %E\n", e); - else if (*sret == 0) - dprint(" (timeout)\n"); - else - dprint(":%F\n", &out->rfds, &out->wfds, &out->xfds, out->max); +static unsigned int +cm_get_ssflags(struct select_state *selstate, int fd) +{ + unsigned int ssflags = 0; +#ifdef USE_POLL + int i; - if (*sret < 0) - return e; - return 0; + for (i = 0; i < selstate->nfds && selstate->fds[i].fd != fd; i++); + assert(i < selstate->nfds); + if (selstate->fds[i].revents & POLLIN) + ssflags |= SSF_READ; + if (selstate->fds[i].revents & POLLOUT) + ssflags |= SSF_WRITE; + if (selstate->fds[i].revents & POLLERR) + ssflags |= SSF_EXCEPTION; +#else + if (FD_ISSET(fd, &selstate->rfds)) + ssflags |= SSF_READ; + if (FD_ISSET(fd, &selstate->wfds)) + ssflags |= SSF_WRITE; + if (FD_ISSET(fd, &selstate->xfds)) + ssflags |= SSF_EXCEPTION; +#endif + return ssflags; } static int service_tcp_fd(krb5_context context, struct conn_state *conn, @@ -702,6 +757,7 @@ krb5_data *callback_buffer) { int fd, e; + unsigned int ssflags; struct addrinfo *ai = state->addr; dprint("start_connection(@%p)\ngetting %s socket in family %d...", state, @@ -711,14 +767,6 @@ dprint("socket: %m creating with af %d\n", state->err, ai->ai_family); return -1; /* try other hosts */ } -#ifndef _WIN32 /* On Windows FD_SETSIZE is a count, not a max value. */ - if (fd >= FD_SETSIZE) { - closesocket(fd); - state->err = EMFILE; - dprint("socket: fd %d too high\n", fd); - return -1; - } -#endif set_cloexec_fd(fd); /* Make it non-blocking. */ if (ai->ai_socktype == SOCK_STREAM) { @@ -801,17 +849,16 @@ } } #endif - FD_SET(state->fd, &selstate->rfds); + ssflags = SSF_READ | SSF_EXCEPTION; if (state->state == CONNECTING || state->state == WRITING) - FD_SET(state->fd, &selstate->wfds); - FD_SET(state->fd, &selstate->xfds); - if (selstate->max <= state->fd) - selstate->max = state->fd + 1; - selstate->nfds++; + ssflags |= SSF_WRITE; + if (!cm_add_fd(selstate, state->fd, ssflags)) { + (void) closesocket(state->fd); + state->fd = INVALID_SOCKET; + state->state = FAILED; + return -1; + } - dprint("new select vectors: %F\n", - &selstate->rfds, &selstate->wfds, &selstate->xfds, selstate->max); - return 0; } @@ -868,22 +915,11 @@ kill_conn(struct conn_state *conn, struct select_state *selstate, int err) { conn->state = FAILED; + conn->err = err; shutdown(conn->fd, SHUTDOWN_BOTH); - FD_CLR(conn->fd, &selstate->rfds); - FD_CLR(conn->fd, &selstate->wfds); - FD_CLR(conn->fd, &selstate->xfds); - conn->err = err; + cm_remove_fd(selstate, conn->fd); dprint("abandoning connection %d: %m\n", conn->fd, err); /* Fix up max fd for next select call. */ - if (selstate->max == 1 + conn->fd) { - while (selstate->max > 0 - && ! FD_ISSET(selstate->max-1, &selstate->rfds) - && ! FD_ISSET(selstate->max-1, &selstate->wfds) - && ! FD_ISSET(selstate->max-1, &selstate->xfds)) - selstate->max--; - dprint("new max_fd + 1 is %d\n", selstate->max); - } - selstate->nfds--; } /* Check socket for error. */ @@ -1005,7 +1041,7 @@ /* Done writing, switch to reading. */ /* Don't call shutdown at this point because * some implementations cannot deal with half-closed connections.*/ - FD_CLR(conn->fd, &selstate->wfds); + cm_unset_write(selstate, conn->fd); /* Q: How do we detect failures to send the remaining data to the remote side, since we're in non-blocking mode? Will we always get errors on the reading side? */ @@ -1125,7 +1161,8 @@ while (selstate->nfds > 0) { unsigned int i; - e = krb5int_cm_call_select(selstate, seltemp, &selret); + selret = 0; + e = cm_select_or_poll(selstate, seltemp, &selret); if (e == EINTR) continue; if (e != 0) @@ -1149,18 +1149,12 @@ service_fds (krb5_context context, return 0; /* Got something on a socket, process it. */ - for (i = 0; i <= (unsigned int)selstate->max && selret > 0 && i < n_conns; i++) { + for (i = 0; i < n_conns; i++) { int ssflags; if (conns[i].fd == INVALID_SOCKET) continue; - ssflags = 0; - if (FD_ISSET(conns[i].fd, &seltemp->rfds)) - ssflags |= SSF_READ, selret--; - if (FD_ISSET(conns[i].fd, &seltemp->wfds)) - ssflags |= SSF_WRITE, selret--; - if (FD_ISSET(conns[i].fd, &seltemp->xfds)) - ssflags |= SSF_EXCEPTION, selret--; + ssflags = cm_get_ssflags(seltemp, conns[i].fd); if (!ssflags) continue; @@ -1229,12 +1259,7 @@ retval = ENOMEM; goto egress; } - sel_state->max = 0; - sel_state->nfds = 0; - sel_state->end_time.tv_sec = sel_state->end_time.tv_usec = 0; - FD_ZERO(&sel_state->rfds); - FD_ZERO(&sel_state->wfds); - FD_ZERO(&sel_state->xfds); + cm_init_selstate(sel_state); /* Set up connections. */ @@ -1295,7 +1295,7 @@ krb5int_sendto (krb5_context context, co (callback_info ? &callback_data[host] : NULL))) continue; - retval = getcurtime(&now); + retval = k5_getcurtime(&now); if (retval) goto egress; sel_state->end_time = now; @@ -1314,7 +1314,7 @@ krb5int_sendto (krb5_context context, co } if (e) break; - retval = getcurtime(&now); + retval = k5_getcurtime(&now); if (retval) goto egress; /* Possible optimization: Find a way to integrate this select