summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Knouff <philip.knouff@mailtrust.com>2012-05-03 14:55:03 -0400
committerMark Washenberger <mark.washenberger@rackspace.com>2012-05-16 13:53:14 -0400
commit7b75fe7f571dd95287307f9d1138fb476a6bf721 (patch)
treea338ddb5c65821d9dda12f4f7183eb56d17d4f2b
parente4d8b4824d0a1492271e262d46b1f8f464128fb8 (diff)
downloadnova-7b75fe7f571dd95287307f9d1138fb476a6bf721.tar.gz
nova-7b75fe7f571dd95287307f9d1138fb476a6bf721.tar.xz
nova-7b75fe7f571dd95287307f9d1138fb476a6bf721.zip
Optional timeout for servers stuck in build
Fixes bug 994786 Change-Id: Iae86c002073f45b48acde8eae07f9b0f62488f90
-rw-r--r--nova/compute/manager.py21
-rw-r--r--nova/tests/test_compute.py104
2 files changed, 125 insertions, 0 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index e35ede5e4..3d42a36f0 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -96,6 +96,11 @@ compute_opts = [
help="Automatically hard reboot an instance if it has been "
"stuck in a rebooting state longer than N seconds. "
"Set to 0 to disable."),
+ cfg.IntOpt("instance_build_timeout",
+ default=0,
+ help="Amount of time in seconds an instance can be in BUILD "
+ "before going into ERROR status."
+ "Set to 0 to disable."),
cfg.IntOpt("rescue_timeout",
default=0,
help="Automatically unrescue an instance after N seconds. "
@@ -450,6 +455,22 @@ class ComputeManager(manager.SchedulerDependentManager):
with excutils.save_and_reraise_exception():
self._set_instance_error_state(context, instance_uuid)
+ @manager.periodic_task
+ def _check_instance_build_time(self, context):
+ """Ensure that instances are not stuck in build."""
+ if FLAGS.instance_build_timeout == 0:
+ return
+
+ filters = {'vm_state': vm_states.BUILDING}
+ building_insts = self.db.instance_get_all_by_filters(context, filters)
+
+ for instance in building_insts:
+ if utils.is_older_than(instance['created_at'],
+ FLAGS.instance_build_timeout):
+ self._set_instance_error_state(context, instance['uuid'])
+ LOG.warn(_("Instance build timed out. Set to error state."),
+ instance=instance)
+
def _update_access_ip(self, context, instance, nw_info):
"""Update the access ip values for a given instance.
diff --git a/nova/tests/test_compute.py b/nova/tests/test_compute.py
index f806e7516..fd9c6a8c0 100644
--- a/nova/tests/test_compute.py
+++ b/nova/tests/test_compute.py
@@ -1873,6 +1873,110 @@ class ComputeTestCase(BaseTestCase):
for uuid, status in expected_migration_status.iteritems():
self.assertEqual(status, fetch_instance_migration_status(uuid))
+ def test_instance_build_timeout_disabled(self):
+ self.flags(instance_build_timeout=0)
+ ctxt = context.get_admin_context()
+ called = {'get_all': False, 'set_error_state': 0}
+ created_at = utils.utcnow() + datetime.timedelta(seconds=-60)
+
+ def fake_instance_get_all_by_filters(*args, **kwargs):
+ called['get_all'] = True
+ return instances[:]
+
+ self.stubs.Set(db, 'instance_get_all_by_filters',
+ fake_instance_get_all_by_filters)
+
+ def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs):
+ called['set_error_state'] += 1
+
+ self.stubs.Set(self.compute, '_set_instance_error_state',
+ fake_set_instance_error_state)
+
+ instance_map = {}
+ instances = []
+ for x in xrange(5):
+ uuid = 'fake-uuid-%s' % x
+ instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host,
+ 'vm_state': vm_states.BUILDING,
+ 'created_at': created_at}
+ instances.append(instance_map[uuid])
+
+ self.compute._check_instance_build_time(ctxt)
+ self.assertFalse(called['get_all'])
+ self.assertEqual(called['set_error_state'], 0)
+
+ def test_instance_build_timeout(self):
+ self.flags(instance_build_timeout=30)
+ ctxt = context.get_admin_context()
+ called = {'get_all': False, 'set_error_state': 0}
+ created_at = utils.utcnow() + datetime.timedelta(seconds=-60)
+
+ def fake_instance_get_all_by_filters(*args, **kwargs):
+ called['get_all'] = True
+ return instances[:]
+
+ self.stubs.Set(db, 'instance_get_all_by_filters',
+ fake_instance_get_all_by_filters)
+
+ def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs):
+ called['set_error_state'] += 1
+
+ self.stubs.Set(self.compute, '_set_instance_error_state',
+ fake_set_instance_error_state)
+
+ instance_map = {}
+ instances = []
+ for x in xrange(5):
+ uuid = 'fake-uuid-%s' % x
+ instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host,
+ 'vm_state': vm_states.BUILDING,
+ 'created_at': created_at}
+ instances.append(instance_map[uuid])
+
+ self.compute._check_instance_build_time(ctxt)
+ self.assertTrue(called['get_all'])
+ self.assertEqual(called['set_error_state'], 5)
+
+ def test_instance_build_timeout_mixed_instances(self):
+ self.flags(instance_build_timeout=30)
+ ctxt = context.get_admin_context()
+ called = {'get_all': False, 'set_error_state': 0}
+ created_at = utils.utcnow() + datetime.timedelta(seconds=-60)
+
+ def fake_instance_get_all_by_filters(*args, **kwargs):
+ called['get_all'] = True
+ return instances[:]
+
+ self.stubs.Set(db, 'instance_get_all_by_filters',
+ fake_instance_get_all_by_filters)
+
+ def fake_set_instance_error_state(_ctxt, instance_uuid, **kwargs):
+ called['set_error_state'] += 1
+
+ self.stubs.Set(self.compute, '_set_instance_error_state',
+ fake_set_instance_error_state)
+
+ instance_map = {}
+ instances = []
+ #expired instances
+ for x in xrange(4):
+ uuid = 'fake-uuid-%s' % x
+ instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host,
+ 'vm_state': vm_states.BUILDING,
+ 'created_at': created_at}
+ instances.append(instance_map[uuid])
+
+ #not expired
+ uuid = 'fake-uuid-5'
+ instance_map[uuid] = {'uuid': uuid, 'host': FLAGS.host,
+ 'vm_state': vm_states.BUILDING,
+ 'created_at': utils.utcnow()}
+ instances.append(instance_map[uuid])
+
+ self.compute._check_instance_build_time(ctxt)
+ self.assertTrue(called['get_all'])
+ self.assertEqual(called['set_error_state'], 4)
+
class ComputeAPITestCase(BaseTestCase):