summaryrefslogtreecommitdiffstats
path: root/scripts/kprobes_test/monitor_system.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/kprobes_test/monitor_system.py')
-rwxr-xr-xscripts/kprobes_test/monitor_system.py82
1 files changed, 82 insertions, 0 deletions
diff --git a/scripts/kprobes_test/monitor_system.py b/scripts/kprobes_test/monitor_system.py
new file mode 100755
index 00000000..59d49e7d
--- /dev/null
+++ b/scripts/kprobes_test/monitor_system.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+
+# Copyright (C) 2008 Red Hat Inc.
+#
+# This file is part of systemtap, and is free software. You can
+# redistribute it and/or modify it under the terms of the GNU General
+# Public License (GPL); either version 2, or (at your option) any
+# later version.
+
+# This script monitors a remote system that is running the kprobes
+# test. If several consecutive 'ping's fail, the system is rebooted.
+#
+# This script takes as an argument a config filename, whose contents
+# should look like the following:
+#
+# config_opts['system_name'] = "SYSTEM_NAME"
+# config_opts['restart_cmds'] = [
+# 'CMD1',
+# 'CMD2',
+# ]
+#
+# As an example, here is a config file used when monitoring a kvm
+# instance:
+#
+# config_opts['system_name'] = "dhcp-148"
+# config_opts['restart_cmds'] = [
+# 'sudo virsh destroy kvm-rawhide-64-1',
+# 'sudo virsh start kvm-rawhide-64-1',
+# ]
+
+import sys
+import os
+import time
+
+if len(sys.argv) != 2:
+ print >>sys.stderr, "Usage: %s config_file" % sys.argv[0]
+ sys.exit(1)
+cfg = sys.argv[1]
+
+# Read in the config file
+if not os.path.exists(cfg):
+ print >>sys.stderr, ("Could not find required config file: %s" % cfg)
+ sys.exit(1)
+
+print "Reading config file %s..." % cfg
+config_opts = dict()
+execfile(cfg)
+if not config_opts.has_key('system_name'):
+ print >>sys.stderr, "Missing required config opt 'system_name'"
+ sys.exit(1)
+if not config_opts.has_key('restart_cmds'):
+ print >>sys.stderr, "Missing required config opt 'restart_cmds'"
+ sys.exit(1)
+
+errors = 0
+while 1:
+ rc = os.system("ping -c 1 %s" % config_opts['system_name'])
+ # If ping worked, system is still up and running. Wait a minute
+ # and try again.
+ if os.WEXITSTATUS(rc) == 0:
+ time.sleep(60)
+ errors = 0
+
+ # If the ping failed, increase the error count. If we've got 3
+ # consecutive errors, assume the machine has crashed and restart
+ # it.
+ else:
+ errors += 1
+ if errors < 3:
+ time.sleep(30)
+ else:
+ print >>sys.stderr, "Restarting %s..." % config_opts['system_name']
+ # Run each restart command
+
+ for cmd in config_opts['restart_cmds']:
+ print >>sys.stderr, "Running '%s'..." % cmd
+ os.system(cmd)
+ # Sleep for 5 minutes to give the system a chance to boot
+ print >>sys.stderr, "Sleeping for 5 minutes..."
+ time.sleep(5 * 60)
+ errors = 0
+