From 6afae496d2314ca6900f6f9709d093aa6bb7f29d Mon Sep 17 00:00:00 2001 From: Brian Elliott Date: Wed, 2 Jan 2013 16:35:23 +0000 Subject: Add compute build/resize errors to instance faults Save exception during build and resize operations in instance faults. This will making it easier to see the individual compute errors that occurred when an operation got re-scheduled. Change-Id: I4224b9638aa9b7572fc0ee1ef6fa3e6654b84805 --- nova/compute/manager.py | 22 +++++++++++------- nova/tests/compute/test_compute.py | 47 ++++++++++++++++++++++++-------------- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 9e4ac301e..6bf171635 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -626,8 +626,9 @@ class ComputeManager(manager.SchedulerDependentManager): LOG.exception(msg, instance=instance) raise except Exception: + exc_info = sys.exc_info() # try to re-schedule instance: - self._reschedule_or_reraise(context, instance, + self._reschedule_or_reraise(context, instance, exc_info, requested_networks, admin_password, injected_files, is_first_time, request_spec, filter_properties) else: @@ -649,16 +650,18 @@ class ComputeManager(manager.SchedulerDependentManager): traceback.format_exception(type_, value, tb), instance_uuid=instance_uuid) - def _reschedule_or_reraise(self, context, instance, requested_networks, - admin_password, injected_files, is_first_time, - request_spec, filter_properties): + def _reschedule_or_reraise(self, context, instance, exc_info, + requested_networks, admin_password, injected_files, is_first_time, + request_spec, filter_properties): """Try to re-schedule the build or re-raise the original build error to error out the instance. """ - exc_info = sys.exc_info() instance_uuid = instance['uuid'] rescheduled = False + compute_utils.add_instance_fault_from_exc(context, instance_uuid, + exc_info[0], exc_info=exc_info) + try: self._deallocate_network(context, instance) except Exception: @@ -1841,8 +1844,9 @@ class ComputeManager(manager.SchedulerDependentManager): reservations, request_spec, filter_properties, node) except Exception: # try to re-schedule the resize elsewhere: + exc_info = sys.exc_info() self._reschedule_resize_or_reraise(context, image, instance, - instance_type, reservations, request_spec, + exc_info, instance_type, reservations, request_spec, filter_properties) finally: extra_usage_info = dict( @@ -1853,7 +1857,7 @@ class ComputeManager(manager.SchedulerDependentManager): context, instance, "resize.prep.end", extra_usage_info=extra_usage_info) - def _reschedule_resize_or_reraise(self, context, image, instance, + def _reschedule_resize_or_reraise(self, context, image, instance, exc_info, instance_type, reservations, request_spec, filter_properties): """Try to re-schedule the resize or re-raise the original error to error out the instance. @@ -1863,10 +1867,12 @@ class ComputeManager(manager.SchedulerDependentManager): if not filter_properties: filter_properties = {} - exc_info = sys.exc_info() rescheduled = False instance_uuid = instance['uuid'] + compute_utils.add_instance_fault_from_exc(context, instance_uuid, + exc_info[0], exc_info=exc_info) + try: scheduler_method = self.scheduler_rpcapi.prep_resize method_args = (instance, instance_type, image, request_spec, diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 8263529b1..48c16c603 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -6027,7 +6027,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): self.compute._spawn(mox.IgnoreArg(), self.instance, None, None, None, False, None).AndRaise(test.TestingException("BuildError")) self.compute._reschedule_or_reraise(mox.IgnoreArg(), self.instance, - None, None, None, False, None, {}) + mox.IgnoreArg(), None, None, None, False, None, {}) self.mox.ReplayAll() self.compute._run_instance(self.context, None, {}, None, None, None, @@ -6045,6 +6045,8 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): except Exception: exc_info = sys.exc_info() + compute_utils.add_instance_fault_from_exc(self.context, + instance_uuid, exc_info[0], exc_info=exc_info) self.compute._deallocate_network(self.context, self.instance).AndRaise(InnerTestingException("Error")) self.compute._log_original_error(exc_info, instance_uuid) @@ -6055,7 +6057,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): # error: self.assertRaises(InnerTestingException, self.compute._reschedule_or_reraise, self.context, - self.instance, None, None, None, False, None, {}) + self.instance, exc_info, None, None, None, False, None, {}) def test_reschedule_fail(self): """Test handling of exception from _reschedule""" @@ -6077,9 +6079,10 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): raise test.TestingException("Original") except Exception: # not re-scheduling, should raise the original build error: + exc_info = sys.exc_info() self.assertRaises(test.TestingException, self.compute._reschedule_or_reraise, self.context, - self.instance, None, None, None, False, None, {}) + self.instance, exc_info, None, None, None, False, None, {}) def test_reschedule_false(self): """Test not-rescheduling, but no nested exception""" @@ -6088,22 +6091,25 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): self.mox.StubOutWithMock(self.compute, '_deallocate_network') self.mox.StubOutWithMock(self.compute, '_reschedule') - self.compute._deallocate_network(self.context, - self.instance) - self.compute._reschedule(self.context, None, instance_uuid, - {}, self.compute.scheduler_rpcapi.run_instance, method_args, - task_states.SCHEDULING).AndReturn(False) - - self.mox.ReplayAll() - try: raise test.TestingException("Original") except Exception: + exc_info = sys.exc_info() + compute_utils.add_instance_fault_from_exc(self.context, + instance_uuid, exc_info[0], exc_info=exc_info) + self.compute._deallocate_network(self.context, + self.instance) + self.compute._reschedule(self.context, None, {}, instance_uuid, + self.compute.scheduler_rpcapi.run_instance, method_args, + task_states.SCHEDULING, exc_info).AndReturn(False) + + self.mox.ReplayAll() + # re-scheduling is False, the original build error should be # raised here: self.assertRaises(test.TestingException, self.compute._reschedule_or_reraise, self.context, - self.instance, None, None, None, False, None, {}) + self.instance, exc_info, None, None, None, False, None, {}) def test_reschedule_true(self): """Test behavior when re-scheduling happens""" @@ -6117,6 +6123,8 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): except Exception: exc_info = sys.exc_info() + compute_utils.add_instance_fault_from_exc(self.context, + instance_uuid, exc_info[0], exc_info=exc_info) self.compute._deallocate_network(self.context, self.instance) self.compute._reschedule(self.context, None, {}, instance_uuid, @@ -6130,7 +6138,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): # re-scheduling is True, original error is logged, but nothing # is raised: self.compute._reschedule_or_reraise(self.context, self.instance, - None, None, None, False, None, {}) + exc_info, None, None, None, False, None, {}) class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): @@ -6155,7 +6163,8 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): mox.IgnoreArg()).AndRaise(test.TestingException("Original")) self.compute._reschedule_resize_or_reraise(mox.IgnoreArg(), None, - self.instance, self.instance_type, None, None, None) + self.instance, mox.IgnoreArg(), self.instance_type, None, None, + None) self.mox.ReplayAll() @@ -6179,9 +6188,11 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): try: raise test.TestingException("Original") except Exception: + exc_info = sys.exc_info() self.assertRaises(test.TestingException, self.compute._reschedule_resize_or_reraise, self.context, - None, self.instance, self.instance_type, None, {}, {}) + None, self.instance, exc_info, self.instance_type, None, + {}, {}) def test_reschedule_false(self): """Original exception should be raised if the resize is not @@ -6199,9 +6210,11 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): try: raise test.TestingException("Original") except Exception: + exc_info = sys.exc_info() self.assertRaises(test.TestingException, self.compute._reschedule_resize_or_reraise, self.context, - None, self.instance, self.instance_type, None, {}, {}) + None, self.instance, exc_info, self.instance_type, None, + {}, {}) def test_reschedule_true(self): """If rescheduled, the original resize exception should be logged""" @@ -6222,7 +6235,7 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): self.mox.ReplayAll() self.compute._reschedule_resize_or_reraise(self.context, None, - self.instance, self.instance_type, None, {}, {}) + self.instance, exc_info, self.instance_type, None, {}, {}) class ComputeInactiveImageTestCase(BaseTestCase): -- cgit