From 19af606787586e619eda34a31e5e619f9913405f Mon Sep 17 00:00:00 2001 From: Nathan Straz Date: Fri, 5 Sep 2014 17:16:34 -0400 Subject: [qarsh] Add recv_packet with selects and hbeat There was an issue when we rebooted a node via qarsh. The packet size would make it back from qarshd, but none of the data so we would get stuck in a read(). We needed to avoid a blocking read() and check for a heart beat while we wait for the rest of the packet. --- qarsh.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/qarsh.c b/qarsh.c index 1f5e1db..372cedf 100644 --- a/qarsh.c +++ b/qarsh.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "sockutil.h" #include "qarsh_packet.h" @@ -111,6 +112,70 @@ copyargs(char **argv) return args; } +/* + * Use select to wait for the entire packet and check heartbeat while we wait + */ +struct qa_packet * +recv_packet_wait(int fd, hbeat_t hbh) +{ + fd_set rfds; + struct timeval timeout; + int nset; + char buf[QARSH_MAX_PACKET_SIZE]; + uint32_t packetsize; + void *psbuf = &packetsize; + int bufused = 0; + int ret = 0; + + + do { + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + timeout.tv_sec = 1; + timeout.tv_usec = 0; + + nset = select(fd+1, &rfds, NULL, NULL, &timeout); + if (nset == 0 && !hbeat(hbh)) return NULL; + if (nset > 0 && FD_ISSET(fd, &rfds)) { + + if ((ret = read(fd, (char *)psbuf+bufused, sizeof packetsize - bufused)) < 0) { + if (errno == EINTR) continue; + lprintf(LOG_ERR, "Read error while reading packet size: %s\n", strerror(errno)); + return NULL; + } else if (ret == 0) { + return NULL; + } + bufused += ret; + } + } while (bufused < sizeof packetsize); + + packetsize = ntohl(packetsize); + if (packetsize > QARSH_MAX_PACKET_SIZE) { + lprintf(LOG_ERR, "Packet size too large, %d > %d\n", packetsize, QARSH_MAX_PACKET_SIZE); + return NULL; + } + /* Keep reading until we get the whole packet and nothing but the packet, so help me socket */ + bufused = 0; + do { + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + timeout.tv_sec = 1; + timeout.tv_usec = 0; + nset = select(fd+1, &rfds, NULL, NULL, &timeout); + if (nset == 0 && !hbeat(hbh)) return NULL; + if (nset > 0 && FD_ISSET(fd, &rfds)) { + if ((ret = read(fd, buf+bufused, packetsize-bufused)) < 0) { + if (errno == EINTR) continue; + lprintf(LOG_ERR, "Read error while reading packet data: %s\n", strerror(errno)); + return NULL; + } + bufused += ret; + } + } while (bufused < packetsize); + + return parse_packet(buf, packetsize); +} + void sig_handler(int sig) { @@ -360,7 +425,8 @@ run_remote_cmd(char *cmdline) nset--; } if (nset && FD_ISSET(qarsh_fd, &rfds)) { - qp = recv_packet(qarsh_fd); + qp = recv_packet_wait(qarsh_fd, qarsh_hb); + if (qp == NULL) { fprintf(stderr, "recv_packet() returned NULL!\n"); break; -- cgit