From e50e9b44ab2b8b1184f93d24734af4b5862777bf Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Thu, 13 Oct 2011 13:14:57 -0500 Subject: Adds the ability to automatically issue a hard reboot to instances that have been stuck in a 'rebooting' state for longer than a specified window. Fixes bug 873099. Change-Id: Ife2c64326fdb3ec849242583d1bd1d96f9f4be0f --- nova/compute/manager.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'nova/compute') diff --git a/nova/compute/manager.py b/nova/compute/manager.py index a10cb1bd6..708920c6a 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -73,6 +73,10 @@ flags.DEFINE_string('console_host', socket.gethostname(), flags.DEFINE_integer('live_migration_retry_count', 30, "Retry count needed in live_migration." " sleep 1 sec for each count") +flags.DEFINE_integer("reboot_timeout", 0, + "Automatically hard reboot an instance if it has been " + "stuck in a rebooting state longer than N seconds." + " Set to 0 to disable.") flags.DEFINE_integer("rescue_timeout", 0, "Automatically unrescue an instance after N seconds." " Set to 0 to disable.") @@ -1784,6 +1788,14 @@ class ComputeManager(manager.SchedulerDependentManager): if error_list is None: error_list = [] + try: + if FLAGS.reboot_timeout > 0: + self.driver.poll_rebooting_instances(FLAGS.reboot_timeout) + except Exception as ex: + LOG.warning(_("Error during poll_rebooting_instances: %s"), + unicode(ex)) + error_list.append(ex) + try: if FLAGS.rescue_timeout > 0: self.driver.poll_rescued_instances(FLAGS.rescue_timeout) -- cgit