From e50e9b44ab2b8b1184f93d24734af4b5862777bf Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Thu, 13 Oct 2011 13:14:57 -0500 Subject: Adds the ability to automatically issue a hard reboot to instances that have been stuck in a 'rebooting' state for longer than a specified window. Fixes bug 873099. Change-Id: Ife2c64326fdb3ec849242583d1bd1d96f9f4be0f --- nova/virt/driver.py | 5 +++++ nova/virt/fake.py | 3 +++ nova/virt/hyperv.py | 6 ++++++ nova/virt/libvirt/connection.py | 4 ++++ nova/virt/xenapi/vm_utils.py | 2 +- nova/virt/xenapi/vmops.py | 20 ++++++++++++++++++++ nova/virt/xenapi_conn.py | 4 ++++ 7 files changed, 43 insertions(+), 1 deletion(-) (limited to 'nova/virt') diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 3e57980f3..88a239002 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -485,6 +485,11 @@ class ComputeDriver(object): # TODO(Vek): Need to pass context in for access to auth_token pass + def poll_rebooting_instances(self, timeout): + """Poll for rebooting instances""" + # TODO(Vek): Need to pass context in for access to auth_token + raise NotImplementedError() + def poll_rescued_instances(self, timeout): """Poll for rescued instances""" # TODO(Vek): Need to pass context in for access to auth_token diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 1e07eb928..6b70be2bc 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -131,6 +131,9 @@ class FakeConnection(driver.ComputeDriver): def unrescue(self, instance, callback, network_info): pass + def poll_rebooting_instances(self, timeout): + pass + def poll_rescued_instances(self, timeout): pass diff --git a/nova/virt/hyperv.py b/nova/virt/hyperv.py index 0d48c3792..16fd94e7f 100644 --- a/nova/virt/hyperv.py +++ b/nova/virt/hyperv.py @@ -485,10 +485,16 @@ class HyperVConnection(driver.ComputeDriver): if vm is None: raise exception.InstanceNotFound(instance_id=instance_name) + def poll_rebooting_instances(self, timeout): + """See xenapi_conn.py implementation.""" + pass + def poll_rescued_instances(self, timeout): + """See xenapi_conn.py implementation.""" pass def poll_unconfirmed_resizes(self, resize_confirm_window): + """See xenapi_conn.py implementation.""" pass def update_available_resource(self, ctxt, host): diff --git a/nova/virt/libvirt/connection.py b/nova/virt/libvirt/connection.py index 97f90312b..4d6ecac28 100644 --- a/nova/virt/libvirt/connection.py +++ b/nova/virt/libvirt/connection.py @@ -613,6 +613,10 @@ class LibvirtConnection(driver.ComputeDriver): os.remove(unrescue_xml_path) self.reboot(instance, network_info, xml=unrescue_xml) + @exception.wrap_exception() + def poll_rebooting_instances(self, timeout): + pass + @exception.wrap_exception() def poll_rescued_instances(self, timeout): pass diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index 495317228..02c4158e9 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -713,7 +713,7 @@ class VMHelper(HelperBase): @classmethod def lookup(cls, session, name_label): - """Look the instance i up, and returns it if available""" + """Look the instance up and return it if available""" vm_refs = session.get_xenapi().VM.get_by_name_label(name_label) n = len(vm_refs) if n == 0: diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index d539871f1..ee70c4e35 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -1117,6 +1117,26 @@ class VMOps(object): vm_ref = self._get_vm_opaque_ref(instance) self._start(instance, vm_ref) + def poll_rebooting_instances(self, timeout): + """Look for expirable rebooting instances. + + - issue a "hard" reboot to any instance that has been stuck in a + reboot state for >= the given timeout + """ + ctxt = nova_context.get_admin_context() + instances = db.instance_get_all_hung_in_rebooting(ctxt, timeout) + + instances_info = dict(instance_count=len(instances), + timeout=timeout) + + if instances_info["instance_count"] > 0: + LOG.info(_("Found %(instance_count)d hung reboots " + "older than %(timeout)d seconds") % instances_info) + + for instance in instances: + LOG.info(_("Automatically hard rebooting %d"), instance.id) + self.compute_api.reboot(ctxt, instance.id, "HARD") + def poll_rescued_instances(self, timeout): """Look for expirable rescued instances. diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py index 700934420..2e4a53c5b 100644 --- a/nova/virt/xenapi_conn.py +++ b/nova/virt/xenapi_conn.py @@ -265,6 +265,10 @@ class XenAPIConnection(driver.ComputeDriver): """Power on the specified instance""" self._vmops.power_on(instance) + def poll_rebooting_instances(self, timeout): + """Poll for rebooting instances""" + self._vmops.poll_rebooting_instances(timeout) + def poll_rescued_instances(self, timeout): """Poll for rescued instances""" self._vmops.poll_rescued_instances(timeout) -- cgit