summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNathan Straz <nstraz@redhat.com>2014-09-05 17:16:34 -0400
committerNathan Straz <nstraz@redhat.com>2014-09-05 17:16:34 -0400
commit19af606787586e619eda34a31e5e619f9913405f (patch)
tree83f8da876d8e14fd4dd4eaac7ad80b23dfe1a27d
parent255c5cace1570e78210dda64992a2853c73ac490 (diff)
downloadqarsh-19af606787586e619eda34a31e5e619f9913405f.zip
qarsh-19af606787586e619eda34a31e5e619f9913405f.tar.gz
qarsh-19af606787586e619eda34a31e5e619f9913405f.tar.xz
[qarsh] Add recv_packet with selects and hbeat
There was an issue when we rebooted a node via qarsh. The packet size would make it back from qarshd, but none of the data so we would get stuck in a read(). We needed to avoid a blocking read() and check for a heart beat while we wait for the rest of the packet.
-rw-r--r--qarsh.c68
1 files changed, 67 insertions, 1 deletions
diff --git a/qarsh.c b/qarsh.c
index 1f5e1db..372cedf 100644
--- a/qarsh.c
+++ b/qarsh.c
@@ -38,6 +38,7 @@
#include <pwd.h>
#include <time.h>
#include <stdarg.h>
+#include <syslog.h>
#include "sockutil.h"
#include "qarsh_packet.h"
@@ -111,6 +112,70 @@ copyargs(char **argv)
return args;
}
+/*
+ * Use select to wait for the entire packet and check heartbeat while we wait
+ */
+struct qa_packet *
+recv_packet_wait(int fd, hbeat_t hbh)
+{
+ fd_set rfds;
+ struct timeval timeout;
+ int nset;
+ char buf[QARSH_MAX_PACKET_SIZE];
+ uint32_t packetsize;
+ void *psbuf = &packetsize;
+ int bufused = 0;
+ int ret = 0;
+
+
+ do {
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+
+ nset = select(fd+1, &rfds, NULL, NULL, &timeout);
+ if (nset == 0 && !hbeat(hbh)) return NULL;
+ if (nset > 0 && FD_ISSET(fd, &rfds)) {
+
+ if ((ret = read(fd, (char *)psbuf+bufused, sizeof packetsize - bufused)) < 0) {
+ if (errno == EINTR) continue;
+ lprintf(LOG_ERR, "Read error while reading packet size: %s\n", strerror(errno));
+ return NULL;
+ } else if (ret == 0) {
+ return NULL;
+ }
+ bufused += ret;
+ }
+ } while (bufused < sizeof packetsize);
+
+ packetsize = ntohl(packetsize);
+ if (packetsize > QARSH_MAX_PACKET_SIZE) {
+ lprintf(LOG_ERR, "Packet size too large, %d > %d\n", packetsize, QARSH_MAX_PACKET_SIZE);
+ return NULL;
+ }
+ /* Keep reading until we get the whole packet and nothing but the packet, so help me socket */
+ bufused = 0;
+ do {
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+ nset = select(fd+1, &rfds, NULL, NULL, &timeout);
+ if (nset == 0 && !hbeat(hbh)) return NULL;
+ if (nset > 0 && FD_ISSET(fd, &rfds)) {
+ if ((ret = read(fd, buf+bufused, packetsize-bufused)) < 0) {
+ if (errno == EINTR) continue;
+ lprintf(LOG_ERR, "Read error while reading packet data: %s\n", strerror(errno));
+ return NULL;
+ }
+ bufused += ret;
+ }
+ } while (bufused < packetsize);
+
+ return parse_packet(buf, packetsize);
+}
+
void
sig_handler(int sig)
{
@@ -360,7 +425,8 @@ run_remote_cmd(char *cmdline)
nset--;
}
if (nset && FD_ISSET(qarsh_fd, &rfds)) {
- qp = recv_packet(qarsh_fd);
+ qp = recv_packet_wait(qarsh_fd, qarsh_hb);
+
if (qp == NULL) {
fprintf(stderr, "recv_packet() returned NULL!\n");
break;