6 files changed, 99 insertions, 24 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index ac63f68ea..432270467 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -65,8 +65,11 @@ flags.DEFINE_string('console_host', socket.gethostname(),
                     'Console proxy host to use to connect to instances on'
                     'this host.')
 flags.DEFINE_integer('live_migration_retry_count', 30,
-                    ("Retry count needed in live_migration."
-                     " sleep 1 sec for each count"))
+                     "Retry count needed in live_migration."
+                     " sleep 1 sec for each count")
+flags.DEFINE_integer("rescue_timeout", 0,
+                     "Automatically unrescue an instance after N seconds."
+                     " Set to 0 to disable.")
 
 LOG = logging.getLogger('nova.compute.manager')
 
@@ -132,6 +135,12 @@ class ComputeManager(manager.Manager):
         """
         self.driver.init_host(host=self.host)
 
+    def periodic_tasks(self, context=None):
+        """Tasks to be run at a periodic interval."""
+        super(ComputeManager, self).periodic_tasks(context)
+        if FLAGS.rescue_timeout > 0:
+            self.driver.poll_rescued_instances(FLAGS.rescue_timeout)
+
     def _update_state(self, context, instance_id):
         """Update the state of an instance from the driver info."""
         # FIXME(ja): include other fields from state?
diff --git a/nova/utils.py b/nova/utils.py
index 8b9ce4734..03a6e8095 100644
--- a/nova/utils.py
+++ b/nova/utils.py
@@ -335,6 +335,14 @@ def utcnow():
 utcnow.override_time = None
 
 
+def is_older_than(before, seconds):
+    """Return True if before is older than 'seconds'"""
+    if utcnow() - before > datetime.timedelta(seconds=seconds):
+        return True
+    else:
+        return False
+
+
 def utcnow_ts():
     """Timestamp version of our utcnow function."""
     return time.mktime(utcnow().timetuple())
diff --git a/nova/virt/hyperv.py b/nova/virt/hyperv.py
index 29d18dac5..75fed6d4f 100644
--- a/nova/virt/hyperv.py
+++ b/nova/virt/hyperv.py
@@ -467,3 +467,6 @@ class HyperVConnection(object):
         if vm is None:
             raise exception.NotFound('Cannot detach volume from missing %s '
                     % instance_name)
+
+    def poll_rescued_instances(self, timeout):
+        pass
diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py
index 214670180..67094320e 100644
--- a/nova/virt/libvirt_conn.py
+++ b/nova/virt/libvirt_conn.py
@@ -417,6 +417,10 @@ class LibvirtConnection(object):
         self.reboot(instance)
 
     @exception.wrap_exception
+    def poll_rescued_instances(self, timeout):
+        pass
+
+    @exception.wrap_exception
     def spawn(self, instance):
         xml = self.to_xml(instance)
         db.instance_set_state(context.get_admin_context(),
diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py
index 872e67f01..cfc3a1c22 100644
--- a/nova/virt/xenapi/vmops.py
+++ b/nova/virt/xenapi/vmops.py
@@ -51,6 +51,7 @@ class VMOps(object):
     def __init__(self, session):
         self.XenAPI = session.get_imported_xenapi()
         self._session = session
+        self.poll_rescue_last_ran = None
 
         VMHelper.XenAPI = self.XenAPI
 
@@ -488,6 +489,10 @@ class VMOps(object):
         except self.XenAPI.Failure, exc:
             LOG.exception(exc)
 
+    def _shutdown_rescue(self, rescue_vm_ref):
+        """Shutdown a rescue instance"""
+        self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm_ref)
+
     def _destroy_vdis(self, instance, vm_ref):
         """Destroys all VDIs associated with a VM"""
         instance_id = instance.id
@@ -505,6 +510,24 @@ class VMOps(object):
             except self.XenAPI.Failure, exc:
                 LOG.exception(exc)
 
+    def _destroy_rescue_vdis(self, rescue_vm_ref):
+        """Destroys all VDIs associated with a rescued VM"""
+        vdi_refs = VMHelper.lookup_vm_vdis(self._session, rescue_vm_ref)
+        for vdi_ref in vdi_refs:
+            try:
+                self._session.call_xenapi("Async.VDI.destroy", vdi_ref)
+            except self.XenAPI.Failure:
+                continue
+
+    def _destroy_rescue_vbds(self, rescue_vm_ref):
+        """Destroys all VBDs tied to a rescue VM"""
+        vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref)
+        for vbd_ref in vbd_refs:
+            vbd_rec = self._session.get_xenapi().VBD.get_record(vbd_ref)
+            if vbd_rec["userdevice"] == "1":  # primary VBD is always 1
+                VMHelper.unplug_vbd(self._session, vbd_ref)
+                VMHelper.destroy_vbd(self._session, vbd_ref)
+
     def _destroy_kernel_ramdisk(self, instance, vm_ref):
         """
         Three situations can occur:
@@ -555,6 +578,14 @@ class VMOps(object):
 
         LOG.debug(_("Instance %(instance_id)s VM destroyed") % locals())
 
+    def _destroy_rescue_instance(self, rescue_vm_ref):
+        """Destroy a rescue instance"""
+        self._destroy_rescue_vbds(rescue_vm_ref)
+        self._shutdown_rescue(rescue_vm_ref)
+        self._destroy_rescue_vdis(rescue_vm_ref)
+
+        self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref)
+
     def destroy(self, instance):
         """
         Destroy VM instance
@@ -658,40 +689,56 @@ class VMOps(object):
 
         """
         rescue_vm_ref = VMHelper.lookup(self._session,
-                                    instance.name + "-rescue")
+                                        instance.name + "-rescue")
 
         if not rescue_vm_ref:
             raise exception.NotFound(_(
                 "Instance is not in Rescue Mode: %s" % instance.name))
 
         original_vm_ref = self._get_vm_opaque_ref(instance)
-        vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref)
-
         instance._rescue = False
 
-        for vbd_ref in vbd_refs:
-            _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref)
-            if _vbd_ref["userdevice"] == "1":
-                VMHelper.unplug_vbd(self._session, vbd_ref)
-                VMHelper.destroy_vbd(self._session, vbd_ref)
+        self._destroy_rescue_instance(rescue_vm_ref)
+        self._release_bootlock(original_vm_ref)
+        self._start(instance, original_vm_ref)
 
-        task1 = self._session.call_xenapi("Async.VM.hard_shutdown",
-                                          rescue_vm_ref)
-        self._session.wait_for_task(task1, instance.id)
+    def poll_rescued_instances(self, timeout):
+        """Look for expirable rescued instances
+            - forcibly exit rescue mode for any instances that have been
+              in rescue mode for >= the provided timeout
+        """
+        last_ran = self.poll_rescue_last_ran
+        if last_ran:
+            if not utils.is_older_than(last_ran, timeout):
+                # Do not run. Let's bail.
+                return
+            else:
+                # Update the time tracker and proceed.
+                self.poll_rescue_last_ran = utils.utcnow()
+        else:
+            # We need a base time to start tracking.
+            self.poll_rescue_last_ran = utils.utcnow()
+            return
 
-        vdi_refs = VMHelper.lookup_vm_vdis(self._session, rescue_vm_ref)
-        for vdi_ref in vdi_refs:
-            try:
-                task = self._session.call_xenapi('Async.VDI.destroy', vdi_ref)
-                self._session.wait_for_task(task, instance.id)
-            except self.XenAPI.Failure:
-                continue
+        rescue_vms = []
+        for instance in self.list_instances():
+            if instance.endswith("-rescue"):
+                rescue_vms.append(dict(name=instance,
+                                  vm_ref=VMHelper.lookup(self._session,
+                                                         instance)))
 
-        task2 = self._session.call_xenapi('Async.VM.destroy', rescue_vm_ref)
-        self._session.wait_for_task(task2, instance.id)
+        for vm in rescue_vms:
+            rescue_name = vm["name"]
+            rescue_vm_ref = vm["vm_ref"]
 
-        self._release_bootlock(original_vm_ref)
-        self._start(instance, original_vm_ref)
+            self._destroy_rescue_instance(rescue_vm_ref)
+
+            original_name = vm["name"].split("-rescue", 1)[0]
+            original_vm_ref = VMHelper.lookup(self._session, original_name)
+
+            self._release_bootlock(original_vm_ref)
+            self._session.call_xenapi("VM.start", original_vm_ref, False,
+                                      False)
 
     def get_info(self, instance):
         """Return data about VM instance"""
diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py
index da2fb51f1..2884687fb 100644
--- a/nova/virt/xenapi_conn.py
+++ b/nova/virt/xenapi_conn.py
@@ -223,6 +223,10 @@ class XenAPIConnection(object):
         """Unrescue the specified instance"""
         self._vmops.unrescue(instance, callback)
 
+    def poll_rescued_instances(self, timeout):
+        """Poll for rescued instances"""
+        self._vmops.poll_rescued_instances(timeout)
+
     def reset_network(self, instance):
         """reset networking for specified instance"""
         self._vmops.reset_network(instance)