summaryrefslogtreecommitdiffstats
path: root/qarsh.c
diff options
context:
space:
mode:
Diffstat (limited to 'qarsh.c')
-rw-r--r--qarsh.c89
1 files changed, 81 insertions, 8 deletions
diff --git a/qarsh.c b/qarsh.c
index b0c4aab..5521311 100644
--- a/qarsh.c
+++ b/qarsh.c
@@ -21,22 +21,34 @@
#include <syslog.h>
#include <pwd.h>
-
#include "sockutil.h"
#include "qarsh_packet.h"
+#include "btime.h"
#define QARSH_MINPORT 5010
/* Globals */
int qarsh_fd = -1; /* The control connection to qarshd */
+char *qarshd_host; /* hostname of remote host */
+int qarsh_allowed_hbeatmisses = 100;
int signal_to_send = 0;
int sigs_to_propogate[] = { SIGINT, SIGTERM, SIGHUP, SIGUSR1, SIGUSR2 };
sigset_t pselect_sigmask;
void
-usage()
+usage(const char *pname)
{
- printf("qarsh: [user[.group]@]hostname cmdline ...\n");
+ fprintf(stderr, "%s [options] [user[.group]@]hostname cmdline ...\n"
+ "-l user Run cmdline using this user name.\n"
+ "-g group Run cmdline using this group name.\n"
+ "-p port Use this port to contact qarshd.\n"
+ "-m misscount Number of missed heartbeats allowed.\n"
+ " A value of 0 disables heartbeats.\n"
+ " Default is value is <%d>.\n"
+
+ ,pname, qarsh_allowed_hbeatmisses);
+
+ return;
}
char *
@@ -110,6 +122,29 @@ set_remote_user(char *user, char *group)
}
}
+unsigned int
+heartbeat(const char *host)
+{
+ int retry;
+ unsigned int hbeat;
+
+ /* User disabled heart beating */
+ if (qarsh_allowed_hbeatmisses == 0) {
+ return 1;
+ }
+
+ for (retry = 0; retry < qarsh_allowed_hbeatmisses; retry++) {
+ if ((hbeat = btime(host)) == 0) {
+ fprintf(stderr, "qarsh: INFO -- missed heartbeat %d\n", retry);
+ sleep(retry);
+ } else {
+ break;
+ }
+ }
+
+ return hbeat;
+}
+
int
run_remote_cmd(char *cmdline)
{
@@ -124,6 +159,16 @@ run_remote_cmd(char *cmdline)
int nset;
struct sockaddr_in caddr;
socklen_t clen;
+ struct timespec timeout;
+ unsigned int start_hbeat;
+ unsigned int hbeat;
+
+ /* Use remote node boot time as hearbeat */
+ start_hbeat = heartbeat(qarshd_host);
+ if (!start_hbeat) {
+ fprintf(stderr, "Can not initialize heartbeat from %s\n", qarshd_host);
+ return 1;
+ }
l_in = bind_any(QARSH_MINPORT);
p_in = getsockport(l_in);
@@ -203,11 +248,28 @@ run_remote_cmd(char *cmdline)
memset(buf, 0, 1024);
for (;;) {
+ timeout.tv_sec = 5;
+ timeout.tv_nsec = 0;
testfds = readfds;
memset(buf, 0, 1024);
- nset = pselect(FD_SETSIZE, &testfds, NULL, NULL, NULL,
+ nset = pselect(FD_SETSIZE, &testfds, NULL, NULL, &timeout,
&pselect_sigmask);
+
+ if (nset == 0) {
+ hbeat = heartbeat(qarshd_host);
+ if ((!hbeat) ||
+ (abs(hbeat - start_hbeat)) > 5) {
+ fprintf(stderr, "No heartbeat from %s\n", qarshd_host);
+ /* Set our return packet as NULL so we exit
+ * with unknown error. */
+ qp = NULL;
+ break;
+ }
+
+ continue;
+ }
+
if (nset == -1 && errno == EINTR) {
/* Only test signals */
if (signal_to_send) {
@@ -253,6 +315,10 @@ run_remote_cmd(char *cmdline)
}
if (nset && FD_ISSET(qarsh_fd, &testfds)) {
qp = recv_packet(qarsh_fd);
+ if (qp == NULL) {
+ fprintf(stderr, "recv_packet() returned NULL!\n:");
+ break;
+ }
/* dump_qp(qp); */
if (qp && qp->qp_type == QP_CMDEXIT) {
@@ -294,7 +360,7 @@ main(int argc, char *argv[])
openlog("qarsh", LOG_PID, LOG_DAEMON);
- while ((c = getopt(argc, argv, "+p:l:g:")) != -1) {
+ while ((c = getopt(argc, argv, "+p:l:g:m:")) != -1) {
switch (c) {
case 'l':
remuser = strdup(optarg);
@@ -305,17 +371,21 @@ main(int argc, char *argv[])
case 'p':
port = atoi(optarg);
break;
+ case 'm':
+ qarsh_allowed_hbeatmisses = atoi(optarg);
+ break;
case '?':
default:
printf("Unknown option %c\n", (char)optopt);
- usage();
+ usage(argv[0]);
exit(1);
} }
if ((host = argv[optind++]) == NULL) {
- usage();
+ usage(argv[0]);
exit(1);
}
+
/* check for user and group in form [user[.group]@]hostname */
{
char *sp;
@@ -341,9 +411,11 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;
if ((args = copyargs(argv)) == NULL) {
- usage();
+ usage(argv[0]);
exit(1);
}
+
+ qarshd_host = strdup(host);
qarsh_fd = connect_to_host(host, port);
if (qarsh_fd == -1) {
if (errno == 0) {
@@ -361,5 +433,6 @@ main(int argc, char *argv[])
ret = run_remote_cmd(args);
close(qarsh_fd);
free(args);
+ free(qarshd_host);
return ret;
}