From 21e08712d9ac5577c27e7ea4c9271372bc0bd3ed Mon Sep 17 00:00:00 2001 From: "Kevin L. Mitchell" Date: Mon, 21 Nov 2011 14:39:22 -0600 Subject: Put instances in ERROR state when scheduler fails. When the scheduler's selected driver method raises an exception, such as NoValidHost, any affected instance must be placed into the ERROR state. This is done by catching exceptions raised in _schedule() and, if 'instance_id' is present in kwargs, moving the identified instance to the ERROR state. This fixes bug 886289. Change-Id: I5c73549e073493701b86658569823b9bc161291d --- nova/scheduler/manager.py | 15 ++++++++++++++- nova/tests/scheduler/test_scheduler.py | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 0f973341e..9d4d03b13 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -23,6 +23,7 @@ Scheduler Service import functools +from nova.compute import vm_states from nova import db from nova import flags from nova import log as logging @@ -97,7 +98,19 @@ class SchedulerManager(manager.Manager): args = (context, topic, method) + args # Scheduler methods are responsible for casting. - return real_meth(*args, **kwargs) + try: + return real_meth(*args, **kwargs) + except Exception as e: + # If this affects a particular instance, move that + # instance to the ERROR state + if 'instance_id' in kwargs: + instance_id = kwargs['instance_id'] + LOG.warning(_("Failed to %(driver_method)s: %(e)s. " + "Putting instance %(instance_id)s into " + "ERROR state.") % locals()) + db.instance_update(context, kwargs['instance_id'], + dict(vm_state=vm_states.ERROR)) + raise # NOTE (masumotok) : This method should be moved to nova.api.ec2.admin. # Based on bexar design summit discussion, diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py index 2c32bbd94..05f954b73 100644 --- a/nova/tests/scheduler/test_scheduler.py +++ b/nova/tests/scheduler/test_scheduler.py @@ -132,6 +132,9 @@ class TestDriver(driver.Scheduler): method = 'named_method' driver.cast_to_host(context, topic, host, method, num=num) + def schedule_failing_method(self, context, instance_id): + raise exception.NoValidHost(reason="") + class SchedulerTestCase(test.TestCase): """Test case for scheduler""" @@ -244,6 +247,21 @@ class SchedulerTestCase(test.TestCase): db.instance_destroy(ctxt, i_ref1['id']) db.instance_destroy(ctxt, i_ref2['id']) + def test_exception_puts_instance_in_error_state(self): + """Test that an exception from the scheduler puts an instance + in the ERROR state.""" + + scheduler = manager.SchedulerManager() + ctxt = context.get_admin_context() + inst = _create_instance() + self.assertRaises(Exception, scheduler._schedule, + 'failing_method', ctxt, 'scheduler', + instance_id=inst['uuid']) + + # Refresh the instance + inst = db.instance_get(ctxt, inst['id']) + self.assertEqual(inst['vm_state'], vm_states.ERROR) + class SimpleDriverTestCase(test.TestCase): """Test case for simple driver""" -- cgit