From dc0208b08c7df7c0e559b021bdffcebdb2dc221e Mon Sep 17 00:00:00 2001 From: Nate Straz Date: Tue, 13 Sep 2005 19:47:14 +0000 Subject: Merge qarsh revisions 1727:1738 from sistina-test branch djansa-qarsh. --- qarsh.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 8 deletions(-) (limited to 'qarsh.c') diff --git a/qarsh.c b/qarsh.c index b0c4aab..5521311 100644 --- a/qarsh.c +++ b/qarsh.c @@ -21,22 +21,34 @@ #include #include - #include "sockutil.h" #include "qarsh_packet.h" +#include "btime.h" #define QARSH_MINPORT 5010 /* Globals */ int qarsh_fd = -1; /* The control connection to qarshd */ +char *qarshd_host; /* hostname of remote host */ +int qarsh_allowed_hbeatmisses = 100; int signal_to_send = 0; int sigs_to_propogate[] = { SIGINT, SIGTERM, SIGHUP, SIGUSR1, SIGUSR2 }; sigset_t pselect_sigmask; void -usage() +usage(const char *pname) { - printf("qarsh: [user[.group]@]hostname cmdline ...\n"); + fprintf(stderr, "%s [options] [user[.group]@]hostname cmdline ...\n" + "-l user Run cmdline using this user name.\n" + "-g group Run cmdline using this group name.\n" + "-p port Use this port to contact qarshd.\n" + "-m misscount Number of missed heartbeats allowed.\n" + " A value of 0 disables heartbeats.\n" + " Default is value is <%d>.\n" + + ,pname, qarsh_allowed_hbeatmisses); + + return; } char * @@ -110,6 +122,29 @@ set_remote_user(char *user, char *group) } } +unsigned int +heartbeat(const char *host) +{ + int retry; + unsigned int hbeat; + + /* User disabled heart beating */ + if (qarsh_allowed_hbeatmisses == 0) { + return 1; + } + + for (retry = 0; retry < qarsh_allowed_hbeatmisses; retry++) { + if ((hbeat = btime(host)) == 0) { + fprintf(stderr, "qarsh: INFO -- missed heartbeat %d\n", retry); + sleep(retry); + } else { + break; + } + } + + return hbeat; +} + int run_remote_cmd(char *cmdline) { @@ -124,6 +159,16 @@ run_remote_cmd(char *cmdline) int nset; struct sockaddr_in caddr; socklen_t clen; + struct timespec timeout; + unsigned int start_hbeat; + unsigned int hbeat; + + /* Use remote node boot time as hearbeat */ + start_hbeat = heartbeat(qarshd_host); + if (!start_hbeat) { + fprintf(stderr, "Can not initialize heartbeat from %s\n", qarshd_host); + return 1; + } l_in = bind_any(QARSH_MINPORT); p_in = getsockport(l_in); @@ -203,11 +248,28 @@ run_remote_cmd(char *cmdline) memset(buf, 0, 1024); for (;;) { + timeout.tv_sec = 5; + timeout.tv_nsec = 0; testfds = readfds; memset(buf, 0, 1024); - nset = pselect(FD_SETSIZE, &testfds, NULL, NULL, NULL, + nset = pselect(FD_SETSIZE, &testfds, NULL, NULL, &timeout, &pselect_sigmask); + + if (nset == 0) { + hbeat = heartbeat(qarshd_host); + if ((!hbeat) || + (abs(hbeat - start_hbeat)) > 5) { + fprintf(stderr, "No heartbeat from %s\n", qarshd_host); + /* Set our return packet as NULL so we exit + * with unknown error. */ + qp = NULL; + break; + } + + continue; + } + if (nset == -1 && errno == EINTR) { /* Only test signals */ if (signal_to_send) { @@ -253,6 +315,10 @@ run_remote_cmd(char *cmdline) } if (nset && FD_ISSET(qarsh_fd, &testfds)) { qp = recv_packet(qarsh_fd); + if (qp == NULL) { + fprintf(stderr, "recv_packet() returned NULL!\n:"); + break; + } /* dump_qp(qp); */ if (qp && qp->qp_type == QP_CMDEXIT) { @@ -294,7 +360,7 @@ main(int argc, char *argv[]) openlog("qarsh", LOG_PID, LOG_DAEMON); - while ((c = getopt(argc, argv, "+p:l:g:")) != -1) { + while ((c = getopt(argc, argv, "+p:l:g:m:")) != -1) { switch (c) { case 'l': remuser = strdup(optarg); @@ -305,17 +371,21 @@ main(int argc, char *argv[]) case 'p': port = atoi(optarg); break; + case 'm': + qarsh_allowed_hbeatmisses = atoi(optarg); + break; case '?': default: printf("Unknown option %c\n", (char)optopt); - usage(); + usage(argv[0]); exit(1); } } if ((host = argv[optind++]) == NULL) { - usage(); + usage(argv[0]); exit(1); } + /* check for user and group in form [user[.group]@]hostname */ { char *sp; @@ -341,9 +411,11 @@ main(int argc, char *argv[]) argc -= optind; argv += optind; if ((args = copyargs(argv)) == NULL) { - usage(); + usage(argv[0]); exit(1); } + + qarshd_host = strdup(host); qarsh_fd = connect_to_host(host, port); if (qarsh_fd == -1) { if (errno == 0) { @@ -361,5 +433,6 @@ main(int argc, char *argv[]) ret = run_remote_cmd(args); close(qarsh_fd); free(args); + free(qarshd_host); return ret; } -- cgit