From cdf10381caad217ae9eb99c69a4226e40ba81db4 Mon Sep 17 00:00:00 2001 From: Hans Lindgren Date: Sun, 2 Jun 2013 00:29:59 +0200 Subject: Fix a race where a soft deleted instance might be removed by mistake When a soft deleted instance is restored there is a tiny risk that it might be deleted if the reclaim periodic task runs at the same time. This happens because the restore operation sets deleted_at to None which makes the reclaim job think it is ok to delete the instance. This can be resolved by restricting reclaims to only those instances whose task_state is None. Resolves bug 1186243. Change-Id: Ia138e2d504b7482c46900583f019a32c70513245 --- nova/compute/manager.py | 1 + nova/db/sqlalchemy/api.py | 2 +- nova/tests/compute/test_compute.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f9a1cc94d..be2f11ab6 100755 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -3989,6 +3989,7 @@ class ComputeManager(manager.SchedulerDependentManager): return filters = {'vm_state': vm_states.SOFT_DELETED, + 'task_state': None, 'host': self.host} instances = self.conductor_api.instance_get_all_by_filters(context, filters) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 18aa185ab..ffff8efed 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -1728,7 +1728,7 @@ def instance_get_all_by_filters(context, filters, sort_key, sort_dir, # For other filters that don't match this, we will do regexp matching exact_match_filter_names = ['project_id', 'user_id', 'image_ref', 'vm_state', 'instance_type_id', 'uuid', - 'metadata', 'host'] + 'metadata', 'host', 'task_state'] # Filter the query query_prefix = exact_filter(query_prefix, models.Instance, diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 491ecf544..9e13b2ffe 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -5004,6 +5004,44 @@ class ComputeTestCase(BaseTestCase): updated_ats = (updated_at_1, updated_at_2, updated_at_3) self.assertEqual(len(updated_ats), len(set(updated_ats))) + def test_reclaim_queued_deletes(self): + self.flags(reclaim_instance_interval=3600) + ctxt = context.get_admin_context() + + # Active + self._create_fake_instance(params={'host': CONF.host}) + + # Deleted not old enough + self._create_fake_instance(params={'host': CONF.host, + 'vm_state': vm_states.SOFT_DELETED, + 'deleted_at': timeutils.utcnow()}) + + # Deleted old enough (only this one should be reclaimed) + deleted_at = (timeutils.utcnow() - + datetime.timedelta(hours=1, minutes=5)) + instance = self._create_fake_instance( + params={'host': CONF.host, + 'vm_state': vm_states.SOFT_DELETED, + 'deleted_at': deleted_at}) + + # Restoring + # NOTE(hanlind): This specifically tests for a race condition + # where restoring a previously soft deleted instance sets + # deleted_at back to None, causing reclaim to think it can be + # deleted, see LP #1186243. + self._create_fake_instance( + params={'host': CONF.host, + 'vm_state': vm_states.SOFT_DELETED, + 'task_state': task_states.RESTORING}) + + self.mox.StubOutWithMock(self.compute, '_delete_instance') + instance_ref = get_primitive_instance_by_uuid(ctxt, instance['uuid']) + self.compute._delete_instance(ctxt, instance_ref, []) + + self.mox.ReplayAll() + + self.compute._reclaim_queued_deletes(ctxt) + class ComputeAPITestCase(BaseTestCase): -- cgit