From c40fc8a4db3fe2d4d415f27d275b1d784a90cfe5 Mon Sep 17 00:00:00 2001
From: Andrew Laski <andrew.laski@rackspace.com>
Date: Tue, 11 Dec 2012 13:48:11 -0500
Subject: Fix poll_rescued_instances periodic task

The poll_rescued_instances periodic task now checks the amount of time
that an instance has been in the RESCUED stated before timing out the
rescue.  It also now performs the unrescue through the compute api in
order to make sure the database is left in a consistent state.

The poll_rescued_instances method is no longer necessary in the virt
driver interface and has been removed.  And also removed from the
different virt drivers, since it was just doing a 'pass' in each of
them.

bug 1088625
bug 1088627

Change-Id: I75f7dc188cc49e5f6e5c8a3cb256d1c42ff7d882
---
 nova/compute/manager.py            | 18 ++++++++++++++++-
 nova/tests/compute/test_compute.py | 34 ++++++++++++++++++++++++++++++++
 nova/tests/test_virt_drivers.py    |  4 ----
 nova/virt/driver.py                |  5 -----
 nova/virt/fake.py                  |  3 ---
 nova/virt/hyperv/driver.py         |  3 ---
 nova/virt/libvirt/driver.py        |  4 ----
 nova/virt/xenapi/driver.py         |  4 ----
 nova/virt/xenapi/vmops.py          | 40 --------------------------------------
 9 files changed, 51 insertions(+), 64 deletions(-)

(limited to 'nova')

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 616083079..979f7c53a 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1592,6 +1592,7 @@ class ComputeManager(manager.SchedulerDependentManager):
                               vm_state=vm_states.RESCUED,
                               task_state=None,
                               power_state=current_power_state,
+                              launched_at=timeutils.utcnow(),
                               expected_task_state=task_states.RESCUING)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@@ -2818,7 +2819,22 @@ class ComputeManager(manager.SchedulerDependentManager):
     @manager.periodic_task
     def _poll_rescued_instances(self, context):
         if CONF.rescue_timeout > 0:
-            self.driver.poll_rescued_instances(CONF.rescue_timeout)
+            instances = self.conductor_api.instance_get_all_by_host(context,
+                                                                    self.host)
+
+            rescued_instances = []
+            for instance in instances:
+                if instance['vm_state'] == vm_states.RESCUED:
+                    rescued_instances.append(instance)
+
+            to_unrescue = []
+            for instance in rescued_instances:
+                if timeutils.is_older_than(instance['launched_at'],
+                                           CONF.rescue_timeout):
+                    to_unrescue.append(instance)
+
+            for instance in to_unrescue:
+                self.compute_api.unrescue(context, instance)
 
     @manager.periodic_task
     def _poll_unconfirmed_resizes(self, context):
diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py
index ccc9614c5..079a25d27 100644
--- a/nova/tests/compute/test_compute.py
+++ b/nova/tests/compute/test_compute.py
@@ -2788,6 +2788,40 @@ class ComputeTestCase(BaseTestCase):
         self.assertEqual(call_info['get_by_uuid'], 3)
         self.assertEqual(call_info['get_nw_info'], 4)
 
+    def test_poll_rescued_instances(self):
+        timed_out_time = timeutils.utcnow() - datetime.timedelta(minutes=5)
+        not_timed_out_time = timeutils.utcnow()
+
+        instances = [{'uuid': 'fake_uuid1', 'vm_state': vm_states.RESCUED,
+                      'launched_at': timed_out_time},
+                     {'uuid': 'fake_uuid2', 'vm_state': vm_states.ACTIVE,
+                      'launched_at': timed_out_time},
+                     {'uuid': 'fake_uuid3', 'vm_state': vm_states.ACTIVE,
+                      'launched_at': not_timed_out_time},
+                     {'uuid': 'fake_uuid4', 'vm_state': vm_states.RESCUED,
+                      'launched_at': timed_out_time},
+                     {'uuid': 'fake_uuid5', 'vm_state': vm_states.RESCUED,
+                      'launched_at': not_timed_out_time}]
+        unrescued_instances = {'fake_uuid1': False, 'fake_uuid4': False}
+
+        def fake_instance_get_all_by_host(context, host):
+            return instances
+
+        def fake_unrescue(self, context, instance):
+            unrescued_instances[instance['uuid']] = True
+
+        self.stubs.Set(self.compute.conductor_api, 'instance_get_all_by_host',
+                       fake_instance_get_all_by_host)
+        self.stubs.Set(compute_api.API, 'unrescue', fake_unrescue)
+
+        self.flags(rescue_timeout=60)
+        ctxt = context.get_admin_context()
+
+        self.compute._poll_rescued_instances(ctxt)
+
+        for instance in unrescued_instances.values():
+            self.assertTrue(instance)
+
     def test_poll_unconfirmed_resizes(self):
         instances = [{'uuid': 'fake_uuid1', 'vm_state': vm_states.RESIZED,
                       'task_state': None},
diff --git a/nova/tests/test_virt_drivers.py b/nova/tests/test_virt_drivers.py
index 834763540..563b3a44b 100644
--- a/nova/tests/test_virt_drivers.py
+++ b/nova/tests/test_virt_drivers.py
@@ -282,10 +282,6 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase):
         instances = [self._get_running_instance()]
         self.connection.poll_rebooting_instances(10, instances)
 
-    @catch_notimplementederror
-    def test_poll_rescued_instances(self):
-        self.connection.poll_rescued_instances(10)
-
     @catch_notimplementederror
     def test_migrate_disk_and_power_off(self):
         instance_ref, network_info = self._get_running_instance()
diff --git a/nova/virt/driver.py b/nova/virt/driver.py
index 005012c7f..991a0f6ce 100644
--- a/nova/virt/driver.py
+++ b/nova/virt/driver.py
@@ -635,11 +635,6 @@ class ComputeDriver(object):
         # TODO(Vek): Need to pass context in for access to auth_token
         raise NotImplementedError()
 
-    def poll_rescued_instances(self, timeout):
-        """Poll for rescued instances"""
-        # TODO(Vek): Need to pass context in for access to auth_token
-        raise NotImplementedError()
-
     def host_power_action(self, host, action):
         """Reboots, shuts down or powers up the host."""
         raise NotImplementedError()
diff --git a/nova/virt/fake.py b/nova/virt/fake.py
index 6f95256be..b2528b008 100644
--- a/nova/virt/fake.py
+++ b/nova/virt/fake.py
@@ -154,9 +154,6 @@ class FakeDriver(driver.ComputeDriver):
     def poll_rebooting_instances(self, timeout, instances):
         pass
 
-    def poll_rescued_instances(self, timeout):
-        pass
-
     def migrate_disk_and_power_off(self, context, instance, dest,
                                    instance_type, network_info,
                                    block_device_info=None):
diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py
index d1b9904c4..a67274f5d 100644
--- a/nova/virt/hyperv/driver.py
+++ b/nova/virt/hyperv/driver.py
@@ -119,9 +119,6 @@ class HyperVDriver(driver.ComputeDriver):
     def get_volume_connector(self, instance):
         return self._volumeops.get_volume_connector(instance)
 
-    def poll_rescued_instances(self, timeout):
-        pass
-
     def get_available_resource(self, nodename):
         return self._hostops.get_available_resource()
 
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index dbc2346be..263fd5ca4 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -1083,10 +1083,6 @@ class LibvirtDriver(driver.ComputeDriver):
     def poll_rebooting_instances(self, timeout, instances):
         pass
 
-    @exception.wrap_exception()
-    def poll_rescued_instances(self, timeout):
-        pass
-
     def _enable_hairpin(self, xml):
         interfaces = self.get_interfaces(xml)
         for interface in interfaces:
diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py
index 79b408b25..1649ffb47 100644
--- a/nova/virt/xenapi/driver.py
+++ b/nova/virt/xenapi/driver.py
@@ -290,10 +290,6 @@ class XenAPIDriver(driver.ComputeDriver):
         """Poll for rebooting instances"""
         self._vmops.poll_rebooting_instances(timeout, instances)
 
-    def poll_rescued_instances(self, timeout):
-        """Poll for rescued instances"""
-        self._vmops.poll_rescued_instances(timeout)
-
     def reset_network(self, instance):
         """reset networking for specified instance"""
         self._vmops.reset_network(instance)
diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py
index a68c9eb8d..588ae1604 100644
--- a/nova/virt/xenapi/vmops.py
+++ b/nova/virt/xenapi/vmops.py
@@ -147,7 +147,6 @@ class VMOps(object):
         self.compute_api = compute.API()
         self._session = session
         self._virtapi = virtapi
-        self.poll_rescue_last_ran = None
         self.firewall_driver = firewall.load_driver(
             DEFAULT_FIREWALL_DRIVER,
             self._virtapi,
@@ -1217,45 +1216,6 @@ class VMOps(object):
             LOG.info(_("Automatically hard rebooting"), instance=instance)
             self.compute_api.reboot(ctxt, instance, "HARD")
 
-    def poll_rescued_instances(self, timeout):
-        """Look for expirable rescued instances.
-
-            - forcibly exit rescue mode for any instances that have been
-              in rescue mode for >= the provided timeout
-
-        """
-        last_ran = self.poll_rescue_last_ran
-        if not last_ran:
-            # We need a base time to start tracking.
-            self.poll_rescue_last_ran = timeutils.utcnow()
-            return
-
-        if not timeutils.is_older_than(last_ran, timeout):
-            # Do not run. Let's bail.
-            return
-
-        # Update the time tracker and proceed.
-        self.poll_rescue_last_ran = timeutils.utcnow()
-
-        rescue_vms = []
-        for instance in self.list_instances():
-            if instance.endswith("-rescue"):
-                rescue_vms.append(dict(name=instance,
-                                       vm_ref=vm_utils.lookup(self._session,
-                                                              instance)))
-
-        for vm in rescue_vms:
-            rescue_vm_ref = vm["vm_ref"]
-
-            original_name = vm["name"].split("-rescue", 1)[0]
-            original_vm_ref = vm_utils.lookup(self._session, original_name)
-
-            self._destroy_rescue_instance(rescue_vm_ref, original_vm_ref)
-
-            self._release_bootlock(original_vm_ref)
-            self._session.call_xenapi("VM.start", original_vm_ref, False,
-                                      False)
-
     def get_info(self, instance, vm_ref=None):
         """Return data about VM instance."""
         vm_ref = vm_ref or self._get_vm_opaque_ref(instance)
-- 
cgit