diff options
author | Philip Knouff <philip.knouff@mailtrust.com> | 2012-05-03 14:55:03 -0400 |
---|---|---|
committer | Mark Washenberger <mark.washenberger@rackspace.com> | 2012-05-16 13:53:14 -0400 |
commit | 7b75fe7f571dd95287307f9d1138fb476a6bf721 (patch) | |
tree | a338ddb5c65821d9dda12f4f7183eb56d17d4f2b | |
parent | e4d8b4824d0a1492271e262d46b1f8f464128fb8 (diff) | |
download | nova-7b75fe7f571dd95287307f9d1138fb476a6bf721.tar.gz nova-7b75fe7f571dd95287307f9d1138fb476a6bf721.tar.xz nova-7b75fe7f571dd95287307f9d1138fb476a6bf721.zip |
Optional timeout for servers stuck in build
Fixes bug 994786
Change-Id: Iae86c002073f45b48acde8eae07f9b0f62488f90
-rw-r--r-- | nova/compute/manager.py | 21 | ||||
-rw-r--r-- | nova/tests/test_compute.py | 104 |
2 files changed, 125 insertions, 0 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index e35ede5e4..3d42a36f0 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -96,6 +96,11 @@ compute_opts = [ help="Automatically hard reboot an instance if it has been " "stuck in a rebooting state longer than N seconds. " "Set to 0 to disable."), + cfg.IntOpt("instance_build_timeout", + default=0, + help="Amount of time in seconds an instance can be in BUILD " + "before going into ERROR status." + "Set to 0 to disable."), cfg.IntOpt("rescue_timeout", default=0, help="Automatically unrescue an instance after N seconds. " @@ -450,6 +455,22 @@ class ComputeManager(manager.SchedulerDependentManager): with excutils.save_and_reraise_exception(): self._set_instance_error_state(context, instance_uuid) + @manager.periodic_task + def _check_instance_build_time(self, context): + """Ensure that instances are not stuck in build.""" + if FLAGS.instance_build_timeout == 0: + return + + filters = {'vm_state': vm_states.BUILDING} + building_insts = self.db.instance_get_all_by_filters(context, filters) + + for instance in building_insts: + if utils.is_older_than(instance['created_at'], + FLAGS.instance_build_timeout): + self._set_instance_error_state(context, instance['uuid']) + LOG.warn(_("Instance build timed out. Set to error state."), + instance=instance) + def _update_access_ip(self, context, instance, nw_info): """Update the access ip values for a given instance. diff --git a/nova/tests/test_compute.py b/nova/tests/test_compute.py index f806e7516..fd9c6a8c0 100644 --- a/nova/tests/test_compute.py +++ b/nova/tests/test_compute.py @@ -1873,6 +1873,110 @@ class ComputeTestCase(BaseTestCase): for uuid, status in expected_migration_status.iteritems(): self.assertEqual(status, fetch_instance_migration_status(uuid)) + def test_instance_build_timeout_disabled(self): + self.flags(instance_build_timeout=0) + ctxt = context.get_admin_context() + called = {'get_all': False, 'set_error_state': 0} + created_at = utils.utcnow() + datetime.timedelta(seconds=-60) + + def fake_instance_get_all_by_filters(*args, **kwargs): + called['get_all'] = True + return instances[:] + + self.stubs.Set(db, 'instance_get_all_by_filters', + fake_instance_get_all_by_filters) + + def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs): + called['set_error_state'] += 1 + + self.stubs.Set(self.compute, '_set_instance_error_state', + fake_set_instance_error_state) + + instance_map = {} + instances = [] + for x in xrange(5): + uuid = 'fake-uuid-%s' % x + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': created_at} + instances.append(instance_map[uuid]) + + self.compute._check_instance_build_time(ctxt) + self.assertFalse(called['get_all']) + self.assertEqual(called['set_error_state'], 0) + + def test_instance_build_timeout(self): + self.flags(instance_build_timeout=30) + ctxt = context.get_admin_context() + called = {'get_all': False, 'set_error_state': 0} + created_at = utils.utcnow() + datetime.timedelta(seconds=-60) + + def fake_instance_get_all_by_filters(*args, **kwargs): + called['get_all'] = True + return instances[:] + + self.stubs.Set(db, 'instance_get_all_by_filters', + fake_instance_get_all_by_filters) + + def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs): + called['set_error_state'] += 1 + + self.stubs.Set(self.compute, '_set_instance_error_state', + fake_set_instance_error_state) + + instance_map = {} + instances = [] + for x in xrange(5): + uuid = 'fake-uuid-%s' % x + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': created_at} + instances.append(instance_map[uuid]) + + self.compute._check_instance_build_time(ctxt) + self.assertTrue(called['get_all']) + self.assertEqual(called['set_error_state'], 5) + + def test_instance_build_timeout_mixed_instances(self): + self.flags(instance_build_timeout=30) + ctxt = context.get_admin_context() + called = {'get_all': False, 'set_error_state': 0} + created_at = utils.utcnow() + datetime.timedelta(seconds=-60) + + def fake_instance_get_all_by_filters(*args, **kwargs): + called['get_all'] = True + return instances[:] + + self.stubs.Set(db, 'instance_get_all_by_filters', + fake_instance_get_all_by_filters) + + def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs): + called['set_error_state'] += 1 + + self.stubs.Set(self.compute, '_set_instance_error_state', + fake_set_instance_error_state) + + instance_map = {} + instances = [] + #expired instances + for x in xrange(4): + uuid = 'fake-uuid-%s' % x + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': created_at} + instances.append(instance_map[uuid]) + + #not expired + uuid = 'fake-uuid-5' + instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host, + 'vm_state': vm_states.BUILDING, + 'created_at': utils.utcnow()} + instances.append(instance_map[uuid]) + + self.compute._check_instance_build_time(ctxt) + self.assertTrue(called['get_all']) + self.assertEqual(called['set_error_state'], 4) + class ComputeAPITestCase(BaseTestCase): |