summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2013-01-02 16:42:48 +0000
committerGerrit Code Review <review@openstack.org>2013-01-02 16:42:48 +0000
commitedf4df3f6fa64f24d3352d69bfdd0e47bbea9f31 (patch)
tree1ed27e58897b0bcb566d7bed099afec8009f8f8a
parent04e4212849d614e0c49eeda33bbbe43d215f9301 (diff)
parent9f01b105fa3f240d9cae88a5b305b4cc312e95f4 (diff)
downloadnova-edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31.tar.gz
nova-edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31.tar.xz
nova-edf4df3f6fa64f24d3352d69bfdd0e47bbea9f31.zip
Merge "Log last compute error when rescheduling."
-rw-r--r--nova/compute/manager.py11
-rw-r--r--nova/scheduler/filter_scheduler.py21
-rw-r--r--nova/tests/compute/test_compute.py25
3 files changed, 46 insertions, 11 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index ccd82d3f0..9e4ac301e 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -674,7 +674,7 @@ class ComputeManager(manager.SchedulerDependentManager):
rescheduled = self._reschedule(context, request_spec,
filter_properties, instance['uuid'],
self.scheduler_rpcapi.run_instance, method_args,
- task_state)
+ task_state, exc_info)
except Exception:
rescheduled = False
@@ -689,7 +689,8 @@ class ComputeManager(manager.SchedulerDependentManager):
raise exc_info[0], exc_info[1], exc_info[2]
def _reschedule(self, context, request_spec, filter_properties,
- instance_uuid, scheduler_method, method_args, task_state):
+ instance_uuid, scheduler_method, method_args, task_state,
+ exc_info=None):
"""Attempt to re-schedule a compute operation."""
retry = filter_properties.get('retry', None)
@@ -713,6 +714,10 @@ class ComputeManager(manager.SchedulerDependentManager):
# reset the task state:
self._instance_update(context, instance_uuid, task_state=task_state)
+ if exc_info:
+ # stringify to avoid circular ref problem in json serialization:
+ retry['exc'] = traceback.format_exception(*exc_info)
+
scheduler_method(context, *method_args)
return True
@@ -1870,7 +1875,7 @@ class ComputeManager(manager.SchedulerDependentManager):
rescheduled = self._reschedule(context, request_spec,
filter_properties, instance_uuid, scheduler_method,
- method_args, task_state)
+ method_args, task_state, exc_info)
except Exception:
rescheduled = False
LOG.exception(_("Error trying to reschedule"),
diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py
index c18daa4cc..ea9a39b6f 100644
--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@@ -191,6 +191,23 @@ class FilterScheduler(driver.Scheduler):
"'scheduler_max_attempts', must be >= 1"))
return max_attempts
+ def _log_compute_error(self, instance_uuid, retry):
+ """If the request contained an exception from a previous compute
+ build/resize operation, log it to aid debugging
+ """
+ exc = retry.pop('exc', None) # string-ified exception from compute
+ if not exc:
+ return # no exception info from a prevous attempt, skip
+
+ hosts = retry.get('hosts', None)
+ if not hosts:
+ return # no previously attempted hosts, skip
+
+ last_host, last_node = hosts[-1]
+ msg = _("Error from last host: %(last_host)s (node %(last_node)s): "
+ "%(exc)s") % locals()
+ LOG.error(msg, instance_uuid=instance_uuid)
+
def _populate_retry(self, filter_properties, instance_properties):
"""Populate filter properties with history of retries for this
request. If maximum retries is exceeded, raise NoValidHost.
@@ -212,8 +229,10 @@ class FilterScheduler(driver.Scheduler):
}
filter_properties['retry'] = retry
+ instance_uuid = instance_properties.get('uuid')
+ self._log_compute_error(instance_uuid, retry)
+
if retry['num_attempts'] > max_attempts:
- instance_uuid = instance_properties.get('uuid')
msg = _("Exceeded max scheduling attempts %(max_attempts)d for "
"instance %(instance_uuid)s") % locals()
raise exception.NoValidHost(reason=msg)
diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py
index 3983dc8bb..8263529b1 100644
--- a/nova/tests/compute/test_compute.py
+++ b/nova/tests/compute/test_compute.py
@@ -23,6 +23,7 @@ import copy
import datetime
import sys
import time
+import traceback
import uuid
import mox
@@ -5924,7 +5925,8 @@ class ComputeReschedulingTestCase(BaseTestCase):
self.updated_task_state = kwargs.get('task_state')
self.stubs.Set(self.compute, '_instance_update', fake_update)
- def _reschedule(self, request_spec=None, filter_properties=None):
+ def _reschedule(self, request_spec=None, filter_properties=None,
+ exc_info=None):
if not filter_properties:
filter_properties = {}
@@ -5940,7 +5942,7 @@ class ComputeReschedulingTestCase(BaseTestCase):
requested_networks, is_first_time, filter_properties)
return self.compute._reschedule(self.context, request_spec,
filter_properties, instance_uuid, scheduler_method,
- method_args, self.expected_task_state)
+ method_args, self.expected_task_state, exc_info=exc_info)
def test_reschedule_no_filter_properties(self):
"""no filter_properties will disable re-scheduling"""
@@ -5961,10 +5963,17 @@ class ComputeReschedulingTestCase(BaseTestCase):
retry = dict(num_attempts=1)
filter_properties = dict(retry=retry)
request_spec = {'instance_uuids': ['foo', 'bar']}
+ try:
+ raise test.TestingException("just need an exception")
+ except test.TestingException:
+ exc_info = sys.exc_info()
+ exc_str = traceback.format_exception(*exc_info)
+
self.assertTrue(self._reschedule(filter_properties=filter_properties,
- request_spec=request_spec))
+ request_spec=request_spec, exc_info=exc_info))
self.assertEqual(1, len(request_spec['instance_uuids']))
self.assertEqual(self.updated_task_state, self.expected_task_state)
+ self.assertEqual(exc_str, filter_properties['retry']['exc'])
class ComputeReschedulingResizeTestCase(ComputeReschedulingTestCase):
@@ -5974,7 +5983,8 @@ class ComputeReschedulingResizeTestCase(ComputeReschedulingTestCase):
super(ComputeReschedulingResizeTestCase, self).setUp()
self.expected_task_state = task_states.RESIZE_PREP
- def _reschedule(self, request_spec=None, filter_properties=None):
+ def _reschedule(self, request_spec=None, filter_properties=None,
+ exc_info=None):
if not filter_properties:
filter_properties = {}
@@ -5991,7 +6001,7 @@ class ComputeReschedulingResizeTestCase(ComputeReschedulingTestCase):
return self.compute._reschedule(self.context, request_spec,
filter_properties, instance_uuid, scheduler_method,
- method_args, self.expected_task_state)
+ method_args, self.expected_task_state, exc_info=exc_info)
class InnerTestingException(Exception):
@@ -6111,7 +6121,8 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase):
self.instance)
self.compute._reschedule(self.context, None, {}, instance_uuid,
self.compute.scheduler_rpcapi.run_instance,
- method_args, task_states.SCHEDULING).AndReturn(True)
+ method_args, task_states.SCHEDULING, exc_info).AndReturn(
+ True)
self.compute._log_original_error(exc_info, instance_uuid)
self.mox.ReplayAll()
@@ -6205,7 +6216,7 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase):
self.compute._reschedule(self.context, {}, {},
self.instance_uuid,
self.compute.scheduler_rpcapi.prep_resize, method_args,
- task_states.RESIZE_PREP).AndReturn(True)
+ task_states.RESIZE_PREP, exc_info).AndReturn(True)
self.compute._log_original_error(exc_info, self.instance_uuid)
self.mox.ReplayAll()