From 0cba85cb267994018c8a0d5e40b2ed0b5a7837df Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Mon, 17 Sep 2012 16:09:41 -0700 Subject: Improve error handling of scheduler Modifies scheduler errors to report instance faults and to set instance_state back to None on failure. Related to bug 1051066 Change-Id: Id9f36a75370849db7baf3fe24ce96c6f4284255d --- nova/scheduler/manager.py | 69 ++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 31 deletions(-) (limited to 'nova/scheduler/manager.py') diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index dca8c0cea..7242bec45 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -22,7 +22,9 @@ Scheduler Service """ import functools +import sys +from nova.compute import utils as compute_utils from nova.compute import vm_states from nova import db from nova import exception @@ -104,12 +106,14 @@ class SchedulerManager(manager.Manager): except exception.NoValidHost as ex: # don't re-raise self._set_vm_state_and_notify('run_instance', - {'vm_state': vm_states.ERROR}, + {'vm_state': vm_states.ERROR, + 'task_state': None}, context, ex, request_spec) except Exception as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify('run_instance', - {'vm_state': vm_states.ERROR}, + {'vm_state': vm_states.ERROR, + 'task_state': None}, context, ex, request_spec) def prep_resize(self, context, image, request_spec, filter_properties, @@ -139,7 +143,8 @@ class SchedulerManager(manager.Manager): except Exception as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify('prep_resize', - {'vm_state': vm_states.ERROR}, + {'vm_state': vm_states.ERROR, + 'task_state': None}, context, ex, request_spec) if reservations: QUOTAS.rollback(context, reservations) @@ -162,34 +167,36 @@ class SchedulerManager(manager.Manager): vm_state = updates['vm_state'] properties = request_spec.get('instance_properties', {}) - # FIXME(comstud): We really need to move error handling closer - # to where the errors occur so we can deal with errors on - # individual instances when scheduling multiple. - if 'instance_uuids' in request_spec: - instance_uuid = request_spec['instance_uuids'][0] - else: - instance_uuid = properties.get('uuid', {}) - - if instance_uuid: - state = vm_state.upper() - LOG.warning(_('Setting instance to %(state)s state.'), locals(), - instance_uuid=instance_uuid) - - # update instance state and notify on the transition - (old_ref, new_ref) = db.instance_update_and_get_original(context, - instance_uuid, updates) - notifications.send_update(context, old_ref, new_ref, - service="scheduler") - - payload = dict(request_spec=request_spec, - instance_properties=properties, - instance_id=instance_uuid, - state=vm_state, - method=method, - reason=ex) - - notifier.notify(context, notifier.publisher_id("scheduler"), - 'scheduler.' + method, notifier.ERROR, payload) + # NOTE(vish): We shouldn't get here unless we have a catastrophic + # failure, so just set all instances to error. if uuid + # is not set, instance_uuids will be set to [None], this + # is solely to preserve existing behavior and can + # be removed along with the 'if instance_uuid:' if we can + # verify that uuid is always set. + uuids = [properties.get('uuid')] + for instance_uuid in request_spec.get('instance_uuids') or uuids: + if instance_uuid: + compute_utils.add_instance_fault_from_exc(context, + instance_uuid, ex, sys.exc_info()) + state = vm_state.upper() + LOG.warning(_('Setting instance to %(state)s state.'), + locals(), instance_uuid=instance_uuid) + + # update instance state and notify on the transition + (old_ref, new_ref) = db.instance_update_and_get_original( + context, instance_uuid, updates) + notifications.send_update(context, old_ref, new_ref, + service="scheduler") + + payload = dict(request_spec=request_spec, + instance_properties=properties, + instance_id=instance_uuid, + state=vm_state, + method=method, + reason=ex) + + notifier.notify(context, notifier.publisher_id("scheduler"), + 'scheduler.' + method, notifier.ERROR, payload) # NOTE (masumotok) : This method should be moved to nova.api.ec2.admin. # Based on bexar design summit discussion, -- cgit