From 7b75fe7f571dd95287307f9d1138fb476a6bf721 Mon Sep 17 00:00:00 2001 From: Philip Knouff Date: Thu, 3 May 2012 14:55:03 -0400 Subject: Optional timeout for servers stuck in build Fixes bug 994786 Change-Id: Iae86c002073f45b48acde8eae07f9b0f62488f90 --- nova/compute/manager.py | 21 +++++++++ nova/tests/test_compute.py | 104 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index e35ede5e4..3d42a36f0 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -96,6 +96,11 @@ compute_opts = [ help="Automatically hard reboot an instance if it has been " "stuck in a rebooting state longer than N seconds. " "Set to 0 to disable."), + cfg.IntOpt("instance_build_timeout", + default=0, + help="Amount of time in seconds an instance can be in BUILD " + "before going into ERROR status." + "Set to 0 to disable."), cfg.IntOpt("rescue_timeout", default=0, help="Automatically unrescue an instance after N seconds. " @@ -450,6 +455,22 @@ class ComputeManager(manager.SchedulerDependentManager): with excutils.save_and_reraise_exception(): self._set_instance_error_state(context, instance_uuid) + @manager.periodic_task + def _check_instance_build_time(self, context): + """Ensure that instances are not stuck in build.""" + if FLAGS.instance_build_timeout == 0: + return + + filters = {'vm_state': vm_states.BUILDING} + building_insts = self.db.instance_get_all_by_filters(context, filters) + + for instance in building_insts: + if utils.is_older_than(instance['created_at'], + FLAGS.instance_build_timeout): + self._set_instance_error_state(context, instance['uuid']) + LOG.warn(_("Instance build timed out. Set to error state."), + instance=instance) + def _update_access_ip(self, context, instance, nw_info): """Update the access ip values for a given instance. diff --git a/nova/tests/test_compute.py b/nova/tests/test_compute.py index f806e7516..fd9c6a8c0 100644 --- a/nova/tests/test_compute.py +++ b/nova/tests/test_compute.py @@ -1873,6 +1873,110 @@ class ComputeTestCase(BaseTestCase): for uuid, status in expected_migration_status.iteritems(): self.assertEqual(status, fetch_instance_migration_status(uuid)) + def test_instance_build_timeout_disabled(self): + self.flags(instance_build_timeout=0) + ctxt = context.get_admin_context() + called = {'get_all': False, 'set_error_state': 0} + created_at = utils.utcnow() + datetime.timedelta(seconds=-60) + + def fake_instance_get_all_by_filters(*args, **kwargs): + called['get_all'] = True + return instances[:] + + self.stubs.Set(db, 'instance_get_all_by_filters', + fake_instance_get_all_by_filters) + + def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs): + called['set_error_state'] += 1 + + self.stubs.Set(self.compute, '_set_instance_error_state', + fake_set_instance_error_state) + + instance_map = {} + instances = [] + for x in xrange(5): + uuid = 'fake-uuid-%s' % x + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': created_at} + instances.append(instance_map[uuid]) + + self.compute._check_instance_build_time(ctxt) + self.assertFalse(called['get_all']) + self.assertEqual(called['set_error_state'], 0) + + def test_instance_build_timeout(self): + self.flags(instance_build_timeout=30) + ctxt = context.get_admin_context() + called = {'get_all': False, 'set_error_state': 0} + created_at = utils.utcnow() + datetime.timedelta(seconds=-60) + + def fake_instance_get_all_by_filters(*args, **kwargs): + called['get_all'] = True + return instances[:] + + self.stubs.Set(db, 'instance_get_all_by_filters', + fake_instance_get_all_by_filters) + + def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs): + called['set_error_state'] += 1 + + self.stubs.Set(self.compute, '_set_instance_error_state', + fake_set_instance_error_state) + + instance_map = {} + instances = [] + for x in xrange(5): + uuid = 'fake-uuid-%s' % x + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': created_at} + instances.append(instance_map[uuid]) + + self.compute._check_instance_build_time(ctxt) + self.assertTrue(called['get_all']) + self.assertEqual(called['set_error_state'], 5) + + def test_instance_build_timeout_mixed_instances(self): + self.flags(instance_build_timeout=30) + ctxt = context.get_admin_context() + called = {'get_all': False, 'set_error_state': 0} + created_at = utils.utcnow() + datetime.timedelta(seconds=-60) + + def fake_instance_get_all_by_filters(*args, **kwargs): + called['get_all'] = True + return instances[:] + + self.stubs.Set(db, 'instance_get_all_by_filters', + fake_instance_get_all_by_filters) + + def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs): + called['set_error_state'] += 1 + + self.stubs.Set(self.compute, '_set_instance_error_state', + fake_set_instance_error_state) + + instance_map = {} + instances = [] + #expired instances + for x in xrange(4): + uuid = 'fake-uuid-%s' % x + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': created_at} + instances.append(instance_map[uuid]) + + #not expired + uuid = 'fake-uuid-5' + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': utils.utcnow()} + instances.append(instance_map[uuid]) + + self.compute._check_instance_build_time(ctxt) + self.assertTrue(called['get_all']) + self.assertEqual(called['set_error_state'], 4) + class ComputeAPITestCase(BaseTestCase): -- cgit