diff options
author | Rick Harris <rconradharris@gmail.com> | 2013-01-17 20:42:27 +0000 |
---|---|---|
committer | Rick Harris <rconradharris@gmail.com> | 2013-01-17 21:44:57 +0000 |
commit | d35fccc579812faa5c8d1338744d24e59c426dd6 (patch) | |
tree | 9f544f909d59a24d7567e46daff31dc9976e6572 /nova | |
parent | 52fe25a5281a50a5e4c1ff093ce9ed966241a3d7 (diff) | |
download | nova-d35fccc579812faa5c8d1338744d24e59c426dd6.tar.gz nova-d35fccc579812faa5c8d1338744d24e59c426dd6.tar.xz nova-d35fccc579812faa5c8d1338744d24e59c426dd6.zip |
Add host to instance_faults table.
Instances can be rescheduled or migrated to different hosts; so, to make
troubleshooting easier, it's convenient to store the host in which the
failure occured with the instance fault record.
Change-Id: Ib2b9a5bab5d95739b730f0eab15884c2db33a782
Diffstat (limited to 'nova')
-rw-r--r-- | nova/compute/manager.py | 8 | ||||
-rw-r--r-- | nova/compute/utils.py | 5 | ||||
-rw-r--r-- | nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py | 36 | ||||
-rw-r--r-- | nova/db/sqlalchemy/models.py | 1 | ||||
-rw-r--r-- | nova/scheduler/driver.py | 4 | ||||
-rw-r--r-- | nova/scheduler/manager.py | 4 | ||||
-rw-r--r-- | nova/tests/compute/test_compute.py | 46 | ||||
-rw-r--r-- | nova/tests/scheduler/test_chance_scheduler.py | 6 | ||||
-rw-r--r-- | nova/tests/scheduler/test_filter_scheduler.py | 16 | ||||
-rw-r--r-- | nova/tests/scheduler/test_scheduler.py | 24 |
10 files changed, 95 insertions, 55 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index fa1746b92..384866cbe 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -230,7 +230,7 @@ def wrap_instance_fault(function): with excutils.save_and_reraise_exception(): compute_utils.add_instance_fault_from_exc(context, - kwargs['instance']['uuid'], e, sys.exc_info()) + kwargs['instance'], e, sys.exc_info()) return decorated_function @@ -730,7 +730,7 @@ class ComputeManager(manager.SchedulerDependentManager): instance_uuid = instance['uuid'] rescheduled = False - compute_utils.add_instance_fault_from_exc(context, instance_uuid, + compute_utils.add_instance_fault_from_exc(context, instance, exc_info[1], exc_info=exc_info) try: @@ -1464,7 +1464,7 @@ class ComputeManager(manager.SchedulerDependentManager): LOG.error(_('Cannot reboot instance: %(exc)s'), locals(), context=context, instance=instance) compute_utils.add_instance_fault_from_exc(context, - instance['uuid'], exc, sys.exc_info()) + instance, exc, sys.exc_info()) # Fall through and reset task_state to None current_power_state = self._get_power_state(context, instance) @@ -1995,7 +1995,7 @@ class ComputeManager(manager.SchedulerDependentManager): rescheduled = False instance_uuid = instance['uuid'] - compute_utils.add_instance_fault_from_exc(context, instance_uuid, + compute_utils.add_instance_fault_from_exc(context, instance, exc_info[0], exc_info=exc_info) try: diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 0c475d082..f07346c6b 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -44,7 +44,7 @@ def metadata_to_dict(metadata): return result -def add_instance_fault_from_exc(context, instance_uuid, fault, exc_info=None): +def add_instance_fault_from_exc(context, instance, fault, exc_info=None): """Adds the specified fault to the database.""" code = 500 @@ -62,10 +62,11 @@ def add_instance_fault_from_exc(context, instance_uuid, fault, exc_info=None): details += '\n' + ''.join(traceback.format_tb(tb)) values = { - 'instance_uuid': instance_uuid, + 'instance_uuid': instance['uuid'], 'code': code, 'message': unicode(message), 'details': unicode(details), + 'host': CONF.host } db.instance_fault_create(context, values) diff --git a/nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py b/nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py new file mode 100644 index 000000000..3fd87e1e1 --- /dev/null +++ b/nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py @@ -0,0 +1,36 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2013 OpenStack LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sqlalchemy import Column, Index, MetaData, String, Table + + +def upgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + + instance_faults = Table('instance_faults', meta, autoload=True) + host = Column('host', String(length=255)) + instance_faults.create_column(host) + Index('instance_faults_host_idx', instance_faults.c.host).create( + migrate_engine) + + +def downgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + + instance_faults = Table('instance_faults', meta, autoload=True) + instance_faults.drop_column('host') diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py index 56a4d944a..5050cb77e 100644 --- a/nova/db/sqlalchemy/models.py +++ b/nova/db/sqlalchemy/models.py @@ -992,6 +992,7 @@ class InstanceFault(BASE, NovaBase): code = Column(Integer(), nullable=False) message = Column(String(255)) details = Column(Text) + host = Column(String(255)) class InstanceAction(BASE, NovaBase): diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index d1ae1cd6e..09de10388 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -56,8 +56,6 @@ CONF.register_opts(scheduler_driver_opts) def handle_schedule_error(context, ex, instance_uuid, request_spec): if not isinstance(ex, exception.NoValidHost): LOG.exception(_("Exception during scheduler.run_instance")) - compute_utils.add_instance_fault_from_exc(context, - instance_uuid, ex, sys.exc_info()) state = vm_states.ERROR.upper() LOG.warning(_('Setting instance to %(state)s state.'), locals(), instance_uuid=instance_uuid) @@ -68,6 +66,8 @@ def handle_schedule_error(context, ex, instance_uuid, request_spec): 'task_state': None}) notifications.send_update(context, old_ref, new_ref, service="scheduler") + compute_utils.add_instance_fault_from_exc(context, + new_ref, ex, sys.exc_info()) properties = request_spec.get('instance_properties', {}) payload = dict(request_spec=request_spec, diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 84bdcddb5..23e64cd7c 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -180,8 +180,6 @@ class SchedulerManager(manager.Manager): uuids = [properties.get('uuid')] for instance_uuid in request_spec.get('instance_uuids') or uuids: if instance_uuid: - compute_utils.add_instance_fault_from_exc(context, - instance_uuid, ex, sys.exc_info()) state = vm_state.upper() LOG.warning(_('Setting instance to %(state)s state.'), locals(), instance_uuid=instance_uuid) @@ -191,6 +189,8 @@ class SchedulerManager(manager.Manager): context, instance_uuid, updates) notifications.send_update(context, old_ref, new_ref, service="scheduler") + compute_utils.add_instance_fault_from_exc(context, + new_ref, ex, sys.exc_info()) payload = dict(request_spec=request_spec, instance_properties=properties, diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 3740d598e..691991f60 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -2698,8 +2698,8 @@ class ComputeTestCase(BaseTestCase): self.assertEqual(task_states.POWERING_OFF, instances[0]['task_state']) def test_add_instance_fault(self): + instance = self._create_fake_instance() exc_info = None - instance_uuid = str(uuid.uuid4()) def fake_db_fault_create(ctxt, values): self.assertTrue(values['details'].startswith('test')) @@ -2709,7 +2709,8 @@ class ComputeTestCase(BaseTestCase): expected = { 'code': 500, 'message': 'NotImplementedError', - 'instance_uuid': instance_uuid, + 'instance_uuid': instance['uuid'], + 'host': self.compute.host } self.assertEquals(expected, values) @@ -2721,13 +2722,12 @@ class ComputeTestCase(BaseTestCase): self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, - NotImplementedError('test'), - exc_info) + compute_utils.add_instance_fault_from_exc(ctxt, instance, + NotImplementedError('test'), exc_info) def test_add_instance_fault_with_remote_error(self): + instance = self._create_fake_instance() exc_info = None - instance_uuid = str(uuid.uuid4()) def fake_db_fault_create(ctxt, values): self.assertTrue(values['details'].startswith('Remote error')) @@ -2737,8 +2737,9 @@ class ComputeTestCase(BaseTestCase): expected = { 'code': 500, - 'instance_uuid': instance_uuid, - 'message': 'My Test Message' + 'instance_uuid': instance['uuid'], + 'message': 'My Test Message', + 'host': self.compute.host } self.assertEquals(expected, values) @@ -2750,13 +2751,12 @@ class ComputeTestCase(BaseTestCase): self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, - exc, - exc_info) + compute_utils.add_instance_fault_from_exc(ctxt, instance, exc, + exc_info) def test_add_instance_fault_user_error(self): + instance = self._create_fake_instance() exc_info = None - instance_uuid = str(uuid.uuid4()) def fake_db_fault_create(ctxt, values): @@ -2764,7 +2764,8 @@ class ComputeTestCase(BaseTestCase): 'code': 400, 'message': 'Invalid', 'details': 'fake details', - 'instance_uuid': instance_uuid, + 'instance_uuid': instance['uuid'], + 'host': self.compute.host } self.assertEquals(expected, values) @@ -2778,26 +2779,27 @@ class ComputeTestCase(BaseTestCase): self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, - user_exc, exc_info) + compute_utils.add_instance_fault_from_exc(ctxt, instance, user_exc, + exc_info) def test_add_instance_fault_no_exc_info(self): - instance_uuid = str(uuid.uuid4()) + instance = self._create_fake_instance() def fake_db_fault_create(ctxt, values): expected = { 'code': 500, 'message': 'NotImplementedError', 'details': 'test', - 'instance_uuid': instance_uuid, + 'instance_uuid': instance['uuid'], + 'host': self.compute.host } self.assertEquals(expected, values) self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, - NotImplementedError('test')) + compute_utils.add_instance_fault_from_exc(ctxt, instance, + NotImplementedError('test')) def test_cleanup_running_deleted_instances(self): admin_context = context.get_admin_context() @@ -6617,7 +6619,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): exc_info = sys.exc_info() compute_utils.add_instance_fault_from_exc(self.context, - instance_uuid, exc_info[0], exc_info=exc_info) + self.instance, exc_info[0], exc_info=exc_info) self.compute._deallocate_network(self.context, self.instance).AndRaise(InnerTestingException("Error")) self.compute._log_original_error(exc_info, instance_uuid) @@ -6667,7 +6669,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): except Exception: exc_info = sys.exc_info() compute_utils.add_instance_fault_from_exc(self.context, - instance_uuid, exc_info[0], exc_info=exc_info) + self.instance, exc_info[0], exc_info=exc_info) self.compute._deallocate_network(self.context, self.instance) self.compute._reschedule(self.context, None, {}, instance_uuid, @@ -6695,7 +6697,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase): exc_info = sys.exc_info() compute_utils.add_instance_fault_from_exc(self.context, - instance_uuid, exc_info[0], exc_info=exc_info) + self.instance, exc_info[0], exc_info=exc_info) self.compute._deallocate_network(self.context, self.instance) self.compute._reschedule(self.context, None, {}, instance_uuid, diff --git a/nova/tests/scheduler/test_chance_scheduler.py b/nova/tests/scheduler/test_chance_scheduler.py index 26cde055b..76fba900d 100644 --- a/nova/tests/scheduler/test_chance_scheduler.py +++ b/nova/tests/scheduler/test_chance_scheduler.py @@ -130,11 +130,11 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase): # instance 1 ctxt.elevated().AndReturn(ctxt_elevated) self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn([]) - compute_utils.add_instance_fault_from_exc(ctxt, - uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) - db.instance_update_and_get_original(ctxt, uuid, + old_ref, new_ref = db.instance_update_and_get_original(ctxt, uuid, {'vm_state': vm_states.ERROR, 'task_state': None}).AndReturn(({}, {})) + compute_utils.add_instance_fault_from_exc(ctxt, + new_ref, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) self.mox.ReplayAll() self.driver.schedule_run_instance( diff --git a/nova/tests/scheduler/test_filter_scheduler.py b/nova/tests/scheduler/test_filter_scheduler.py index 5d8e8236b..2bd2cb85b 100644 --- a/nova/tests/scheduler/test_filter_scheduler.py +++ b/nova/tests/scheduler/test_filter_scheduler.py @@ -58,11 +58,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') self.mox.StubOutWithMock(db, 'instance_update_and_get_original') + old_ref, new_ref = db.instance_update_and_get_original(fake_context, + uuid, {'vm_state': vm_states.ERROR, 'task_state': + None}).AndReturn(({}, {})) compute_utils.add_instance_fault_from_exc(fake_context, - uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) - db.instance_update_and_get_original(fake_context, uuid, - {'vm_state': vm_states.ERROR, - 'task_state': None}).AndReturn(({}, {})) + new_ref, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) self.mox.ReplayAll() sched.schedule_run_instance( fake_context, request_spec, None, None, None, None, {}) @@ -88,11 +88,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): 'instance_uuids': [uuid]} self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') self.mox.StubOutWithMock(db, 'instance_update_and_get_original') + old_ref, new_ref = db.instance_update_and_get_original(fake_context, + uuid, {'vm_state': vm_states.ERROR, 'task_state': + None}).AndReturn(({}, {})) compute_utils.add_instance_fault_from_exc(fake_context, - uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) - db.instance_update_and_get_original(fake_context, uuid, - {'vm_state': vm_states.ERROR, - 'task_state': None}).AndReturn(({}, {})) + new_ref, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) self.mox.ReplayAll() sched.schedule_run_instance( fake_context, request_spec, None, None, None, None, {}) diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py index dd5b0ae32..eb4c3864f 100644 --- a/nova/tests/scheduler/test_scheduler.py +++ b/nova/tests/scheduler/test_scheduler.py @@ -183,12 +183,12 @@ class SchedulerManagerTestCase(test.TestCase): self.manager.driver.schedule_run_instance(self.context, request_spec, None, None, None, None, {}).AndRaise( exception.NoValidHost(reason="")) - db.instance_update_and_get_original(self.context, fake_instance_uuid, + old, new_ref = db.instance_update_and_get_original(self.context, + fake_instance_uuid, {"vm_state": vm_states.ERROR, "task_state": None}).AndReturn((inst, inst)) - compute_utils.add_instance_fault_from_exc(self.context, - fake_instance_uuid, mox.IsA(exception.NoValidHost), - mox.IgnoreArg()) + compute_utils.add_instance_fault_from_exc(self.context, new_ref, + mox.IsA(exception.NoValidHost), mox.IgnoreArg()) self.mox.ReplayAll() self.manager.run_instance(self.context, request_spec, @@ -217,12 +217,12 @@ class SchedulerManagerTestCase(test.TestCase): } self.manager.driver.schedule_prep_resize(**kwargs).AndRaise( exception.NoValidHost(reason="")) - db.instance_update_and_get_original(self.context, fake_instance_uuid, + old_ref, new_ref = db.instance_update_and_get_original(self.context, + fake_instance_uuid, {"vm_state": vm_states.ACTIVE, "task_state": None}).AndReturn( (inst, inst)) - compute_utils.add_instance_fault_from_exc(self.context, - fake_instance_uuid, mox.IsA(exception.NoValidHost), - mox.IgnoreArg()) + compute_utils.add_instance_fault_from_exc(self.context, new_ref, + mox.IsA(exception.NoValidHost), mox.IgnoreArg()) self.mox.ReplayAll() self.manager.prep_resize(**kwargs) @@ -254,12 +254,12 @@ class SchedulerManagerTestCase(test.TestCase): "vm_state": "", "task_state": "", } - db.instance_update_and_get_original(self.context, fake_instance_uuid, + old_ref, new_ref = db.instance_update_and_get_original(self.context, + fake_instance_uuid, {"vm_state": vm_states.ERROR, "task_state": None}).AndReturn((inst, inst)) - compute_utils.add_instance_fault_from_exc(self.context, - fake_instance_uuid, mox.IsA(test.TestingException), - mox.IgnoreArg()) + compute_utils.add_instance_fault_from_exc(self.context, new_ref, + mox.IsA(test.TestingException), mox.IgnoreArg()) self.mox.ReplayAll() |