From c40fc8a4db3fe2d4d415f27d275b1d784a90cfe5 Mon Sep 17 00:00:00 2001 From: Andrew Laski Date: Tue, 11 Dec 2012 13:48:11 -0500 Subject: Fix poll_rescued_instances periodic task The poll_rescued_instances periodic task now checks the amount of time that an instance has been in the RESCUED stated before timing out the rescue. It also now performs the unrescue through the compute api in order to make sure the database is left in a consistent state. The poll_rescued_instances method is no longer necessary in the virt driver interface and has been removed. And also removed from the different virt drivers, since it was just doing a 'pass' in each of them. bug 1088625 bug 1088627 Change-Id: I75f7dc188cc49e5f6e5c8a3cb256d1c42ff7d882 --- nova/compute/manager.py | 18 ++++++++++++++++- nova/tests/compute/test_compute.py | 34 ++++++++++++++++++++++++++++++++ nova/tests/test_virt_drivers.py | 4 ---- nova/virt/driver.py | 5 ----- nova/virt/fake.py | 3 --- nova/virt/hyperv/driver.py | 3 --- nova/virt/libvirt/driver.py | 4 ---- nova/virt/xenapi/driver.py | 4 ---- nova/virt/xenapi/vmops.py | 40 -------------------------------------- 9 files changed, 51 insertions(+), 64 deletions(-) (limited to 'nova') diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 616083079..979f7c53a 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1592,6 +1592,7 @@ class ComputeManager(manager.SchedulerDependentManager): vm_state=vm_states.RESCUED, task_state=None, power_state=current_power_state, + launched_at=timeutils.utcnow(), expected_task_state=task_states.RESCUING) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) @@ -2818,7 +2819,22 @@ class ComputeManager(manager.SchedulerDependentManager): @manager.periodic_task def _poll_rescued_instances(self, context): if CONF.rescue_timeout > 0: - self.driver.poll_rescued_instances(CONF.rescue_timeout) + instances = self.conductor_api.instance_get_all_by_host(context, + self.host) + + rescued_instances = [] + for instance in instances: + if instance['vm_state'] == vm_states.RESCUED: + rescued_instances.append(instance) + + to_unrescue = [] + for instance in rescued_instances: + if timeutils.is_older_than(instance['launched_at'], + CONF.rescue_timeout): + to_unrescue.append(instance) + + for instance in to_unrescue: + self.compute_api.unrescue(context, instance) @manager.periodic_task def _poll_unconfirmed_resizes(self, context): diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index ccc9614c5..079a25d27 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -2788,6 +2788,40 @@ class ComputeTestCase(BaseTestCase): self.assertEqual(call_info['get_by_uuid'], 3) self.assertEqual(call_info['get_nw_info'], 4) + def test_poll_rescued_instances(self): + timed_out_time = timeutils.utcnow() - datetime.timedelta(minutes=5) + not_timed_out_time = timeutils.utcnow() + + instances = [{'uuid': 'fake_uuid1', 'vm_state': vm_states.RESCUED, + 'launched_at': timed_out_time}, + {'uuid': 'fake_uuid2', 'vm_state': vm_states.ACTIVE, + 'launched_at': timed_out_time}, + {'uuid': 'fake_uuid3', 'vm_state': vm_states.ACTIVE, + 'launched_at': not_timed_out_time}, + {'uuid': 'fake_uuid4', 'vm_state': vm_states.RESCUED, + 'launched_at': timed_out_time}, + {'uuid': 'fake_uuid5', 'vm_state': vm_states.RESCUED, + 'launched_at': not_timed_out_time}] + unrescued_instances = {'fake_uuid1': False, 'fake_uuid4': False} + + def fake_instance_get_all_by_host(context, host): + return instances + + def fake_unrescue(self, context, instance): + unrescued_instances[instance['uuid']] = True + + self.stubs.Set(self.compute.conductor_api, 'instance_get_all_by_host', + fake_instance_get_all_by_host) + self.stubs.Set(compute_api.API, 'unrescue', fake_unrescue) + + self.flags(rescue_timeout=60) + ctxt = context.get_admin_context() + + self.compute._poll_rescued_instances(ctxt) + + for instance in unrescued_instances.values(): + self.assertTrue(instance) + def test_poll_unconfirmed_resizes(self): instances = [{'uuid': 'fake_uuid1', 'vm_state': vm_states.RESIZED, 'task_state': None}, diff --git a/nova/tests/test_virt_drivers.py b/nova/tests/test_virt_drivers.py index 834763540..563b3a44b 100644 --- a/nova/tests/test_virt_drivers.py +++ b/nova/tests/test_virt_drivers.py @@ -282,10 +282,6 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase): instances = [self._get_running_instance()] self.connection.poll_rebooting_instances(10, instances) - @catch_notimplementederror - def test_poll_rescued_instances(self): - self.connection.poll_rescued_instances(10) - @catch_notimplementederror def test_migrate_disk_and_power_off(self): instance_ref, network_info = self._get_running_instance() diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 005012c7f..991a0f6ce 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -635,11 +635,6 @@ class ComputeDriver(object): # TODO(Vek): Need to pass context in for access to auth_token raise NotImplementedError() - def poll_rescued_instances(self, timeout): - """Poll for rescued instances""" - # TODO(Vek): Need to pass context in for access to auth_token - raise NotImplementedError() - def host_power_action(self, host, action): """Reboots, shuts down or powers up the host.""" raise NotImplementedError() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 6f95256be..b2528b008 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -154,9 +154,6 @@ class FakeDriver(driver.ComputeDriver): def poll_rebooting_instances(self, timeout, instances): pass - def poll_rescued_instances(self, timeout): - pass - def migrate_disk_and_power_off(self, context, instance, dest, instance_type, network_info, block_device_info=None): diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index d1b9904c4..a67274f5d 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -119,9 +119,6 @@ class HyperVDriver(driver.ComputeDriver): def get_volume_connector(self, instance): return self._volumeops.get_volume_connector(instance) - def poll_rescued_instances(self, timeout): - pass - def get_available_resource(self, nodename): return self._hostops.get_available_resource() diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index dbc2346be..263fd5ca4 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1083,10 +1083,6 @@ class LibvirtDriver(driver.ComputeDriver): def poll_rebooting_instances(self, timeout, instances): pass - @exception.wrap_exception() - def poll_rescued_instances(self, timeout): - pass - def _enable_hairpin(self, xml): interfaces = self.get_interfaces(xml) for interface in interfaces: diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index 79b408b25..1649ffb47 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -290,10 +290,6 @@ class XenAPIDriver(driver.ComputeDriver): """Poll for rebooting instances""" self._vmops.poll_rebooting_instances(timeout, instances) - def poll_rescued_instances(self, timeout): - """Poll for rescued instances""" - self._vmops.poll_rescued_instances(timeout) - def reset_network(self, instance): """reset networking for specified instance""" self._vmops.reset_network(instance) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index a68c9eb8d..588ae1604 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -147,7 +147,6 @@ class VMOps(object): self.compute_api = compute.API() self._session = session self._virtapi = virtapi - self.poll_rescue_last_ran = None self.firewall_driver = firewall.load_driver( DEFAULT_FIREWALL_DRIVER, self._virtapi, @@ -1217,45 +1216,6 @@ class VMOps(object): LOG.info(_("Automatically hard rebooting"), instance=instance) self.compute_api.reboot(ctxt, instance, "HARD") - def poll_rescued_instances(self, timeout): - """Look for expirable rescued instances. - - - forcibly exit rescue mode for any instances that have been - in rescue mode for >= the provided timeout - - """ - last_ran = self.poll_rescue_last_ran - if not last_ran: - # We need a base time to start tracking. - self.poll_rescue_last_ran = timeutils.utcnow() - return - - if not timeutils.is_older_than(last_ran, timeout): - # Do not run. Let's bail. - return - - # Update the time tracker and proceed. - self.poll_rescue_last_ran = timeutils.utcnow() - - rescue_vms = [] - for instance in self.list_instances(): - if instance.endswith("-rescue"): - rescue_vms.append(dict(name=instance, - vm_ref=vm_utils.lookup(self._session, - instance))) - - for vm in rescue_vms: - rescue_vm_ref = vm["vm_ref"] - - original_name = vm["name"].split("-rescue", 1)[0] - original_vm_ref = vm_utils.lookup(self._session, original_name) - - self._destroy_rescue_instance(rescue_vm_ref, original_vm_ref) - - self._release_bootlock(original_vm_ref) - self._session.call_xenapi("VM.start", original_vm_ref, False, - False) - def get_info(self, instance, vm_ref=None): """Return data about VM instance.""" vm_ref = vm_ref or self._get_vm_opaque_ref(instance) -- cgit