summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2009-06-03 16:09:12 -0500
committerDavid Teigland <teigland@redhat.com>2009-06-03 16:09:12 -0500
commitac2d3c82e00ba073006b8b406c15d02c5390d6fc (patch)
tree0d8601d825f743b0b5ee610d2e1065fb959858c9
parentef5e1222b90ea3625a4a1ce87a9b35e4ffacdc69 (diff)
downloaddct-stuff-ac2d3c82e00ba073006b8b406c15d02c5390d6fc.tar.gz
dct-stuff-ac2d3c82e00ba073006b8b406c15d02c5390d6fc.tar.xz
dct-stuff-ac2d3c82e00ba073006b8b406c15d02c5390d6fc.zip
cpgx: improve killing and restarting
of corosync or aisexec, and optionally by starting exec directly instead of via cman_tool Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r--cpgx/cpgx.c53
1 files changed, 44 insertions, 9 deletions
diff --git a/cpgx/cpgx.c b/cpgx/cpgx.c
index 683a431..23748b7 100644
--- a/cpgx/cpgx.c
+++ b/cpgx/cpgx.c
@@ -32,6 +32,7 @@
#include <time.h>
#include <errno.h>
#include <signal.h>
+#include <syslog.h>
#include <sys/poll.h>
#include <sys/time.h>
#include <sys/wait.h>
@@ -45,6 +46,12 @@
#include "list.h"
+#ifdef WHITETANK
+static char *exec_name = "aisexec";
+#else
+static char *exec_name = "corosync";
+#endif
+
#define CLIENT_NALLOC 2
#define MAX_NODES 8 /* not easily changed */
#define DUMP_SIZE (1024 * 1024)
@@ -131,6 +138,7 @@ static cpg_handle_t dct_cpg_handle;
static int dct_cpg_client;
static int dct_cpg_fd;
+static int exec_join = 0;
static int prog_quit;
static int cluster_down;
static int opt_leave = 1;
@@ -1487,6 +1495,34 @@ int iterations_done(void)
return 0;
}
+void restart_cluster(void)
+{
+ syslog(LOG_WARNING, "%ld killing %s", time(NULL), exec_name);
+ log_debug("killing %s", exec_name);
+
+ if (exec_name[0] == 'a')
+ system("killall -9 aisexec");
+ else
+ system("killall -9 corosync");
+
+ /* others should see us fail before we rejoin, not sure 10s will
+ be enough for some people */
+
+ sleep(10);
+
+ syslog(LOG_WARNING, "%ld starting %s", time(NULL), exec_name);
+ log_debug("starting %s", exec_name);
+
+ if (!exec_join) {
+ system("cman_tool join -w");
+ } else {
+ if (exec_name[0] == 'a')
+ system("aisexec");
+ else
+ system("corosync");
+ }
+}
+
void loop(void)
{
void (*workfn) (int ci);
@@ -1592,13 +1628,7 @@ void loop(void)
if (we_should_die()) {
fflush(stdout);
fflush(stderr);
- log_debug("killall -9 corosync/aisexec");
- system("killall -9 corosync");
- system("killall -9 aisexec");
- /* others should see us fail before we rejoin */
- sleep(10);
- log_debug("cman_tool join -w");
- system("cman_tool join -w");
+ restart_cluster();
exit(2);
}
}
@@ -1614,11 +1644,12 @@ void print_usage(void)
printf(" -e [0|1] exit included in test [off|on], default 1\n");
printf(" (program exits without leaving cpg then rejoins)\n");
printf(" -d [0|1] die included in test [off|on], default 0\n");
- printf(" (kills corosync/aisexec, restarts with cman_tool join)\n");
+ printf(" (program kills and restarts %s)\n", exec_name);
printf(" -s <num> sync up to num events, default %d\n",
DEFAULT_SYNC_MAX);
printf(" -t <sec> timeout after no dispatch for this many seconds, default 0 (never)\n");
printf(" -i <sec> run for this many seconds, default 0 (forever)\n");
+ printf(" -p run \"%s\" to join with -d rather than cman_tool\n", exec_name);
printf(" -c continue after error\n");
printf(" -V print version\n");
printf("\n");
@@ -1642,7 +1673,7 @@ int main(int argc, char **argv)
int optchar;
while (cont) {
- optchar = getopt(argc, argv, "H:D:l:e:d:s:t:i:chV");
+ optchar = getopt(argc, argv, "H:D:l:e:d:s:t:i:pchV");
switch (optchar) {
case 'H':
@@ -1677,6 +1708,10 @@ int main(int argc, char **argv)
iterations_sec = atoi(optarg);
break;
+ case 'p':
+ exec_join = 1;
+ break;
+
case 'c':
continue_after_error = 1;
break;