diff options
| author | Jenkins <jenkins@review.openstack.org> | 2013-01-02 16:42:48 +0000 |
|---|---|---|
| committer | Gerrit Code Review <review@openstack.org> | 2013-01-02 16:42:48 +0000 |
| commit | edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31 (patch) | |
| tree | 1ed27e58897b0bcb566d7bed099afec8009f8f8a | |
| parent | 04e4212849d614e0c49eeda33bbbe43d215f9301 (diff) | |
| parent | 9f01b105fa3f240d9cae88a5b305b4cc312e95f4 (diff) | |
| download | nova-edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31.tar.gz nova-edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31.tar.xz nova-edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31.zip | |
Merge "Log last compute error when rescheduling."
| -rw-r--r-- | nova/compute/manager.py | 11 | ||||
| -rw-r--r-- | nova/scheduler/filter_scheduler.py | 21 | ||||
| -rw-r--r-- | nova/tests/compute/test_compute.py | 25 |
3 files changed, 46 insertions, 11 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index ccd82d3f0..9e4ac301e 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -674,7 +674,7 @@ class ComputeManager(manager.SchedulerDependentManager): rescheduled = self._reschedule(context, request_spec, filter_properties, instance['uuid'], self.scheduler_rpcapi.run_instance, method_args, - task_state) + task_state, exc_info) except Exception: rescheduled = False @@ -689,7 +689,8 @@ class ComputeManager(manager.SchedulerDependentManager): raise exc_info[0], exc_info[1], exc_info[2] def _reschedule(self, context, request_spec, filter_properties, - instance_uuid, scheduler_method, method_args, task_state): + instance_uuid, scheduler_method, method_args, task_state, + exc_info=None): """Attempt to re-schedule a compute operation.""" retry = filter_properties.get('retry', None) @@ -713,6 +714,10 @@ class ComputeManager(manager.SchedulerDependentManager): # reset the task state: self._instance_update(context, instance_uuid, task_state=task_state) + if exc_info: + # stringify to avoid circular ref problem in json serialization: + retry['exc'] = traceback.format_exception(*exc_info) + scheduler_method(context, *method_args) return True @@ -1870,7 +1875,7 @@ class ComputeManager(manager.SchedulerDependentManager): rescheduled = self._reschedule(context, request_spec, filter_properties, instance_uuid, scheduler_method, - method_args, task_state) + method_args, task_state, exc_info) except Exception: rescheduled = False LOG.exception(_("Error trying to reschedule"), diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py index c18daa4cc..ea9a39b6f 100644 --- a/nova/scheduler/filter_scheduler.py +++ b/nova/scheduler/filter_scheduler.py @@ -191,6 +191,23 @@ class FilterScheduler(driver.Scheduler): "'scheduler_max_attempts', must be >= 1")) return max_attempts + def _log_compute_error(self, instance_uuid, retry): + """If the request contained an exception from a previous compute + build/resize operation, log it to aid debugging + """ + exc = retry.pop('exc', None) # string-ified exception from compute + if not exc: + return # no exception info from a prevous attempt, skip + + hosts = retry.get('hosts', None) + if not hosts: + return # no previously attempted hosts, skip + + last_host, last_node = hosts[-1] + msg = _("Error from last host: %(last_host)s (node %(last_node)s): " + "%(exc)s") % locals() + LOG.error(msg, instance_uuid=instance_uuid) + def _populate_retry(self, filter_properties, instance_properties): """Populate filter properties with history of retries for this request. If maximum retries is exceeded, raise NoValidHost. @@ -212,8 +229,10 @@ class FilterScheduler(driver.Scheduler): } filter_properties['retry'] = retry + instance_uuid = instance_properties.get('uuid') + self._log_compute_error(instance_uuid, retry) + if retry['num_attempts'] > max_attempts: - instance_uuid = instance_properties.get('uuid') msg = _("Exceeded max scheduling attempts %(max_attempts)d for " "instance %(instance_uuid)s") % locals() raise exception.NoValidHost(reason=msg) diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 3983dc8bb..8263529b1 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -23,6 +23,7 @@ import copy import datetime import sys import time +import traceback import uuid import mox @@ -5924,7 +5925,8 @@ class ComputeReschedulingTestCase(BaseTestCase): self.updated_task_state = kwargs.get('task_state') self.stubs.Set(self.compute, '_instance_update', fake_update) - def _reschedule(self, request_spec=None, filter_properties=None): + def _reschedule(self, request_spec=None, filter_properties=None, + exc_info=None): if not filter_properties: filter_properties = {} @@ -5940,7 +5942,7 @@ class ComputeReschedulingTestCase(BaseTestCase): requested_networks, is_first_time, filter_properties) return self.compute._reschedule(self.context, request_spec, filter_properties, instance_uuid, scheduler_method, - method_args, self.expected_task_state) + method_args, self.expected_task_state, exc_info=exc_info) def test_reschedule_no_filter_properties(self): """no filter_properties will disable re-scheduling""" @@ -5961,10 +5963,17 @@ class ComputeReschedulingTestCase(BaseTestCase): retry = dict(num_attempts=1) filter_properties = dict(retry=retry) request_spec = {'instance_uuids': ['foo', 'bar']} + try: + raise test.TestingException("just need an exception") + except test.TestingException: + exc_info = sys.exc_info() + exc_str = traceback.format_exception(*exc_info) + self.assertTrue(self._reschedule(filter_properties=filter_properties, - request_spec=request_spec)) + request_spec=request_spec, exc_info=exc_info)) self.assertEqual(1, len(request_spec['instance_uuids'])) self.assertEqual(self.updated_task_state, self.expected_task_state) + self.assertEqual(exc_str, filter_properties['retry']['exc']) class ComputeReschedulingResizeTestCase(ComputeReschedulingTestCase): @@ -5974,7 +5983,8 @@ class ComputeReschedulingResizeTestCase(ComputeReschedulingTestCase): super(ComputeReschedulingResizeTestCase, self).setUp() self.expected_task_state = task_states.RESIZE_PREP - def _reschedule(self, request_spec=None, filter_properties=None): + def _reschedule(self, request_spec=None, filter_properties=None, + exc_info=None): if not filter_properties: filter_properties = {} @@ -5991,7 +6001,7 @@ class ComputeReschedulingResizeTestCase(ComputeReschedulingTestCase): return self.compute._reschedule(self.context, request_spec, filter_properties, instance_uuid, scheduler_method, - method_args, self.expected_task_state) + method_args, self.expected_task_state, exc_info=exc_info) class InnerTestingException(Exception): @@ -6111,7 +6121,8 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): self.instance) self.compute._reschedule(self.context, None, {}, instance_uuid, self.compute.scheduler_rpcapi.run_instance, - method_args, task_states.SCHEDULING).AndReturn(True) + method_args, task_states.SCHEDULING, exc_info).AndReturn( + True) self.compute._log_original_error(exc_info, instance_uuid) self.mox.ReplayAll() @@ -6205,7 +6216,7 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): self.compute._reschedule(self.context, {}, {}, self.instance_uuid, self.compute.scheduler_rpcapi.prep_resize, method_args, - task_states.RESIZE_PREP).AndReturn(True) + task_states.RESIZE_PREP, exc_info).AndReturn(True) self.compute._log_original_error(exc_info, self.instance_uuid) self.mox.ReplayAll() |
