diff options
author | David Teigland <teigland@redhat.com> | 2009-06-03 16:09:12 -0500 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2009-06-03 16:09:12 -0500 |
commit | ac2d3c82e00ba073006b8b406c15d02c5390d6fc (patch) | |
tree | 0d8601d825f743b0b5ee610d2e1065fb959858c9 | |
parent | ef5e1222b90ea3625a4a1ce87a9b35e4ffacdc69 (diff) | |
download | dct-stuff-ac2d3c82e00ba073006b8b406c15d02c5390d6fc.tar.gz dct-stuff-ac2d3c82e00ba073006b8b406c15d02c5390d6fc.tar.xz dct-stuff-ac2d3c82e00ba073006b8b406c15d02c5390d6fc.zip |
cpgx: improve killing and restarting
of corosync or aisexec, and optionally by starting exec directly
instead of via cman_tool
Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r-- | cpgx/cpgx.c | 53 |
1 files changed, 44 insertions, 9 deletions
diff --git a/cpgx/cpgx.c b/cpgx/cpgx.c index 683a431..23748b7 100644 --- a/cpgx/cpgx.c +++ b/cpgx/cpgx.c @@ -32,6 +32,7 @@ #include <time.h> #include <errno.h> #include <signal.h> +#include <syslog.h> #include <sys/poll.h> #include <sys/time.h> #include <sys/wait.h> @@ -45,6 +46,12 @@ #include "list.h" +#ifdef WHITETANK +static char *exec_name = "aisexec"; +#else +static char *exec_name = "corosync"; +#endif + #define CLIENT_NALLOC 2 #define MAX_NODES 8 /* not easily changed */ #define DUMP_SIZE (1024 * 1024) @@ -131,6 +138,7 @@ static cpg_handle_t dct_cpg_handle; static int dct_cpg_client; static int dct_cpg_fd; +static int exec_join = 0; static int prog_quit; static int cluster_down; static int opt_leave = 1; @@ -1487,6 +1495,34 @@ int iterations_done(void) return 0; } +void restart_cluster(void) +{ + syslog(LOG_WARNING, "%ld killing %s", time(NULL), exec_name); + log_debug("killing %s", exec_name); + + if (exec_name[0] == 'a') + system("killall -9 aisexec"); + else + system("killall -9 corosync"); + + /* others should see us fail before we rejoin, not sure 10s will + be enough for some people */ + + sleep(10); + + syslog(LOG_WARNING, "%ld starting %s", time(NULL), exec_name); + log_debug("starting %s", exec_name); + + if (!exec_join) { + system("cman_tool join -w"); + } else { + if (exec_name[0] == 'a') + system("aisexec"); + else + system("corosync"); + } +} + void loop(void) { void (*workfn) (int ci); @@ -1592,13 +1628,7 @@ void loop(void) if (we_should_die()) { fflush(stdout); fflush(stderr); - log_debug("killall -9 corosync/aisexec"); - system("killall -9 corosync"); - system("killall -9 aisexec"); - /* others should see us fail before we rejoin */ - sleep(10); - log_debug("cman_tool join -w"); - system("cman_tool join -w"); + restart_cluster(); exit(2); } } @@ -1614,11 +1644,12 @@ void print_usage(void) printf(" -e [0|1] exit included in test [off|on], default 1\n"); printf(" (program exits without leaving cpg then rejoins)\n"); printf(" -d [0|1] die included in test [off|on], default 0\n"); - printf(" (kills corosync/aisexec, restarts with cman_tool join)\n"); + printf(" (program kills and restarts %s)\n", exec_name); printf(" -s <num> sync up to num events, default %d\n", DEFAULT_SYNC_MAX); printf(" -t <sec> timeout after no dispatch for this many seconds, default 0 (never)\n"); printf(" -i <sec> run for this many seconds, default 0 (forever)\n"); + printf(" -p run \"%s\" to join with -d rather than cman_tool\n", exec_name); printf(" -c continue after error\n"); printf(" -V print version\n"); printf("\n"); @@ -1642,7 +1673,7 @@ int main(int argc, char **argv) int optchar; while (cont) { - optchar = getopt(argc, argv, "H:D:l:e:d:s:t:i:chV"); + optchar = getopt(argc, argv, "H:D:l:e:d:s:t:i:pchV"); switch (optchar) { case 'H': @@ -1677,6 +1708,10 @@ int main(int argc, char **argv) iterations_sec = atoi(optarg); break; + case 'p': + exec_join = 1; + break; + case 'c': continue_after_error = 1; break; |