summaryrefslogtreecommitdiffstats
path: root/nova
diff options
context:
space:
mode:
authorRick Harris <rconradharris@gmail.com>2013-01-17 20:42:27 +0000
committerRick Harris <rconradharris@gmail.com>2013-01-17 21:44:57 +0000
commitd35fccc579812faa5c8d1338744d24e59c426dd6 (patch)
tree9f544f909d59a24d7567e46daff31dc9976e6572 /nova
parent52fe25a5281a50a5e4c1ff093ce9ed966241a3d7 (diff)
downloadnova-d35fccc579812faa5c8d1338744d24e59c426dd6.tar.gz
nova-d35fccc579812faa5c8d1338744d24e59c426dd6.tar.xz
nova-d35fccc579812faa5c8d1338744d24e59c426dd6.zip
Add host to instance_faults table.
Instances can be rescheduled or migrated to different hosts; so, to make troubleshooting easier, it's convenient to store the host in which the failure occured with the instance fault record. Change-Id: Ib2b9a5bab5d95739b730f0eab15884c2db33a782
Diffstat (limited to 'nova')
-rw-r--r--nova/compute/manager.py8
-rw-r--r--nova/compute/utils.py5
-rw-r--r--nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py36
-rw-r--r--nova/db/sqlalchemy/models.py1
-rw-r--r--nova/scheduler/driver.py4
-rw-r--r--nova/scheduler/manager.py4
-rw-r--r--nova/tests/compute/test_compute.py46
-rw-r--r--nova/tests/scheduler/test_chance_scheduler.py6
-rw-r--r--nova/tests/scheduler/test_filter_scheduler.py16
-rw-r--r--nova/tests/scheduler/test_scheduler.py24
10 files changed, 95 insertions, 55 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index fa1746b92..384866cbe 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -230,7 +230,7 @@ def wrap_instance_fault(function):
with excutils.save_and_reraise_exception():
compute_utils.add_instance_fault_from_exc(context,
- kwargs['instance']['uuid'], e, sys.exc_info())
+ kwargs['instance'], e, sys.exc_info())
return decorated_function
@@ -730,7 +730,7 @@ class ComputeManager(manager.SchedulerDependentManager):
instance_uuid = instance['uuid']
rescheduled = False
- compute_utils.add_instance_fault_from_exc(context, instance_uuid,
+ compute_utils.add_instance_fault_from_exc(context, instance,
exc_info[1], exc_info=exc_info)
try:
@@ -1464,7 +1464,7 @@ class ComputeManager(manager.SchedulerDependentManager):
LOG.error(_('Cannot reboot instance: %(exc)s'), locals(),
context=context, instance=instance)
compute_utils.add_instance_fault_from_exc(context,
- instance['uuid'], exc, sys.exc_info())
+ instance, exc, sys.exc_info())
# Fall through and reset task_state to None
current_power_state = self._get_power_state(context, instance)
@@ -1995,7 +1995,7 @@ class ComputeManager(manager.SchedulerDependentManager):
rescheduled = False
instance_uuid = instance['uuid']
- compute_utils.add_instance_fault_from_exc(context, instance_uuid,
+ compute_utils.add_instance_fault_from_exc(context, instance,
exc_info[0], exc_info=exc_info)
try:
diff --git a/nova/compute/utils.py b/nova/compute/utils.py
index 0c475d082..f07346c6b 100644
--- a/nova/compute/utils.py
+++ b/nova/compute/utils.py
@@ -44,7 +44,7 @@ def metadata_to_dict(metadata):
return result
-def add_instance_fault_from_exc(context, instance_uuid, fault, exc_info=None):
+def add_instance_fault_from_exc(context, instance, fault, exc_info=None):
"""Adds the specified fault to the database."""
code = 500
@@ -62,10 +62,11 @@ def add_instance_fault_from_exc(context, instance_uuid, fault, exc_info=None):
details += '\n' + ''.join(traceback.format_tb(tb))
values = {
- 'instance_uuid': instance_uuid,
+ 'instance_uuid': instance['uuid'],
'code': code,
'message': unicode(message),
'details': unicode(details),
+ 'host': CONF.host
}
db.instance_fault_create(context, values)
diff --git a/nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py b/nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py
new file mode 100644
index 000000000..3fd87e1e1
--- /dev/null
+++ b/nova/db/sqlalchemy/migrate_repo/versions/150_add_host_to_instance_faults.py
@@ -0,0 +1,36 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2013 OpenStack LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from sqlalchemy import Column, Index, MetaData, String, Table
+
+
+def upgrade(migrate_engine):
+ meta = MetaData()
+ meta.bind = migrate_engine
+
+ instance_faults = Table('instance_faults', meta, autoload=True)
+ host = Column('host', String(length=255))
+ instance_faults.create_column(host)
+ Index('instance_faults_host_idx', instance_faults.c.host).create(
+ migrate_engine)
+
+
+def downgrade(migrate_engine):
+ meta = MetaData()
+ meta.bind = migrate_engine
+
+ instance_faults = Table('instance_faults', meta, autoload=True)
+ instance_faults.drop_column('host')
diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py
index 56a4d944a..5050cb77e 100644
--- a/nova/db/sqlalchemy/models.py
+++ b/nova/db/sqlalchemy/models.py
@@ -992,6 +992,7 @@ class InstanceFault(BASE, NovaBase):
code = Column(Integer(), nullable=False)
message = Column(String(255))
details = Column(Text)
+ host = Column(String(255))
class InstanceAction(BASE, NovaBase):
diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py
index d1ae1cd6e..09de10388 100644
--- a/nova/scheduler/driver.py
+++ b/nova/scheduler/driver.py
@@ -56,8 +56,6 @@ CONF.register_opts(scheduler_driver_opts)
def handle_schedule_error(context, ex, instance_uuid, request_spec):
if not isinstance(ex, exception.NoValidHost):
LOG.exception(_("Exception during scheduler.run_instance"))
- compute_utils.add_instance_fault_from_exc(context,
- instance_uuid, ex, sys.exc_info())
state = vm_states.ERROR.upper()
LOG.warning(_('Setting instance to %(state)s state.'),
locals(), instance_uuid=instance_uuid)
@@ -68,6 +66,8 @@ def handle_schedule_error(context, ex, instance_uuid, request_spec):
'task_state': None})
notifications.send_update(context, old_ref, new_ref,
service="scheduler")
+ compute_utils.add_instance_fault_from_exc(context,
+ new_ref, ex, sys.exc_info())
properties = request_spec.get('instance_properties', {})
payload = dict(request_spec=request_spec,
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
index 84bdcddb5..23e64cd7c 100644
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@@ -180,8 +180,6 @@ class SchedulerManager(manager.Manager):
uuids = [properties.get('uuid')]
for instance_uuid in request_spec.get('instance_uuids') or uuids:
if instance_uuid:
- compute_utils.add_instance_fault_from_exc(context,
- instance_uuid, ex, sys.exc_info())
state = vm_state.upper()
LOG.warning(_('Setting instance to %(state)s state.'),
locals(), instance_uuid=instance_uuid)
@@ -191,6 +189,8 @@ class SchedulerManager(manager.Manager):
context, instance_uuid, updates)
notifications.send_update(context, old_ref, new_ref,
service="scheduler")
+ compute_utils.add_instance_fault_from_exc(context,
+ new_ref, ex, sys.exc_info())
payload = dict(request_spec=request_spec,
instance_properties=properties,
diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py
index 3740d598e..691991f60 100644
--- a/nova/tests/compute/test_compute.py
+++ b/nova/tests/compute/test_compute.py
@@ -2698,8 +2698,8 @@ class ComputeTestCase(BaseTestCase):
self.assertEqual(task_states.POWERING_OFF, instances[0]['task_state'])
def test_add_instance_fault(self):
+ instance = self._create_fake_instance()
exc_info = None
- instance_uuid = str(uuid.uuid4())
def fake_db_fault_create(ctxt, values):
self.assertTrue(values['details'].startswith('test'))
@@ -2709,7 +2709,8 @@ class ComputeTestCase(BaseTestCase):
expected = {
'code': 500,
'message': 'NotImplementedError',
- 'instance_uuid': instance_uuid,
+ 'instance_uuid': instance['uuid'],
+ 'host': self.compute.host
}
self.assertEquals(expected, values)
@@ -2721,13 +2722,12 @@ class ComputeTestCase(BaseTestCase):
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
- compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
- NotImplementedError('test'),
- exc_info)
+ compute_utils.add_instance_fault_from_exc(ctxt, instance,
+ NotImplementedError('test'), exc_info)
def test_add_instance_fault_with_remote_error(self):
+ instance = self._create_fake_instance()
exc_info = None
- instance_uuid = str(uuid.uuid4())
def fake_db_fault_create(ctxt, values):
self.assertTrue(values['details'].startswith('Remote error'))
@@ -2737,8 +2737,9 @@ class ComputeTestCase(BaseTestCase):
expected = {
'code': 500,
- 'instance_uuid': instance_uuid,
- 'message': 'My Test Message'
+ 'instance_uuid': instance['uuid'],
+ 'message': 'My Test Message',
+ 'host': self.compute.host
}
self.assertEquals(expected, values)
@@ -2750,13 +2751,12 @@ class ComputeTestCase(BaseTestCase):
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
- compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
- exc,
- exc_info)
+ compute_utils.add_instance_fault_from_exc(ctxt, instance, exc,
+ exc_info)
def test_add_instance_fault_user_error(self):
+ instance = self._create_fake_instance()
exc_info = None
- instance_uuid = str(uuid.uuid4())
def fake_db_fault_create(ctxt, values):
@@ -2764,7 +2764,8 @@ class ComputeTestCase(BaseTestCase):
'code': 400,
'message': 'Invalid',
'details': 'fake details',
- 'instance_uuid': instance_uuid,
+ 'instance_uuid': instance['uuid'],
+ 'host': self.compute.host
}
self.assertEquals(expected, values)
@@ -2778,26 +2779,27 @@ class ComputeTestCase(BaseTestCase):
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
- compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
- user_exc, exc_info)
+ compute_utils.add_instance_fault_from_exc(ctxt, instance, user_exc,
+ exc_info)
def test_add_instance_fault_no_exc_info(self):
- instance_uuid = str(uuid.uuid4())
+ instance = self._create_fake_instance()
def fake_db_fault_create(ctxt, values):
expected = {
'code': 500,
'message': 'NotImplementedError',
'details': 'test',
- 'instance_uuid': instance_uuid,
+ 'instance_uuid': instance['uuid'],
+ 'host': self.compute.host
}
self.assertEquals(expected, values)
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
- compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
- NotImplementedError('test'))
+ compute_utils.add_instance_fault_from_exc(ctxt, instance,
+ NotImplementedError('test'))
def test_cleanup_running_deleted_instances(self):
admin_context = context.get_admin_context()
@@ -6617,7 +6619,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase):
exc_info = sys.exc_info()
compute_utils.add_instance_fault_from_exc(self.context,
- instance_uuid, exc_info[0], exc_info=exc_info)
+ self.instance, exc_info[0], exc_info=exc_info)
self.compute._deallocate_network(self.context,
self.instance).AndRaise(InnerTestingException("Error"))
self.compute._log_original_error(exc_info, instance_uuid)
@@ -6667,7 +6669,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase):
except Exception:
exc_info = sys.exc_info()
compute_utils.add_instance_fault_from_exc(self.context,
- instance_uuid, exc_info[0], exc_info=exc_info)
+ self.instance, exc_info[0], exc_info=exc_info)
self.compute._deallocate_network(self.context,
self.instance)
self.compute._reschedule(self.context, None, {}, instance_uuid,
@@ -6695,7 +6697,7 @@ class ComputeRescheduleOrReraiseTestCase(BaseTestCase):
exc_info = sys.exc_info()
compute_utils.add_instance_fault_from_exc(self.context,
- instance_uuid, exc_info[0], exc_info=exc_info)
+ self.instance, exc_info[0], exc_info=exc_info)
self.compute._deallocate_network(self.context,
self.instance)
self.compute._reschedule(self.context, None, {}, instance_uuid,
diff --git a/nova/tests/scheduler/test_chance_scheduler.py b/nova/tests/scheduler/test_chance_scheduler.py
index 26cde055b..76fba900d 100644
--- a/nova/tests/scheduler/test_chance_scheduler.py
+++ b/nova/tests/scheduler/test_chance_scheduler.py
@@ -130,11 +130,11 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase):
# instance 1
ctxt.elevated().AndReturn(ctxt_elevated)
self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn([])
- compute_utils.add_instance_fault_from_exc(ctxt,
- uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
- db.instance_update_and_get_original(ctxt, uuid,
+ old_ref, new_ref = db.instance_update_and_get_original(ctxt, uuid,
{'vm_state': vm_states.ERROR,
'task_state': None}).AndReturn(({}, {}))
+ compute_utils.add_instance_fault_from_exc(ctxt,
+ new_ref, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
self.mox.ReplayAll()
self.driver.schedule_run_instance(
diff --git a/nova/tests/scheduler/test_filter_scheduler.py b/nova/tests/scheduler/test_filter_scheduler.py
index 5d8e8236b..2bd2cb85b 100644
--- a/nova/tests/scheduler/test_filter_scheduler.py
+++ b/nova/tests/scheduler/test_filter_scheduler.py
@@ -58,11 +58,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
+ old_ref, new_ref = db.instance_update_and_get_original(fake_context,
+ uuid, {'vm_state': vm_states.ERROR, 'task_state':
+ None}).AndReturn(({}, {}))
compute_utils.add_instance_fault_from_exc(fake_context,
- uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
- db.instance_update_and_get_original(fake_context, uuid,
- {'vm_state': vm_states.ERROR,
- 'task_state': None}).AndReturn(({}, {}))
+ new_ref, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
self.mox.ReplayAll()
sched.schedule_run_instance(
fake_context, request_spec, None, None, None, None, {})
@@ -88,11 +88,11 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
'instance_uuids': [uuid]}
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
+ old_ref, new_ref = db.instance_update_and_get_original(fake_context,
+ uuid, {'vm_state': vm_states.ERROR, 'task_state':
+ None}).AndReturn(({}, {}))
compute_utils.add_instance_fault_from_exc(fake_context,
- uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
- db.instance_update_and_get_original(fake_context, uuid,
- {'vm_state': vm_states.ERROR,
- 'task_state': None}).AndReturn(({}, {}))
+ new_ref, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
self.mox.ReplayAll()
sched.schedule_run_instance(
fake_context, request_spec, None, None, None, None, {})
diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py
index dd5b0ae32..eb4c3864f 100644
--- a/nova/tests/scheduler/test_scheduler.py
+++ b/nova/tests/scheduler/test_scheduler.py
@@ -183,12 +183,12 @@ class SchedulerManagerTestCase(test.TestCase):
self.manager.driver.schedule_run_instance(self.context,
request_spec, None, None, None, None, {}).AndRaise(
exception.NoValidHost(reason=""))
- db.instance_update_and_get_original(self.context, fake_instance_uuid,
+ old, new_ref = db.instance_update_and_get_original(self.context,
+ fake_instance_uuid,
{"vm_state": vm_states.ERROR,
"task_state": None}).AndReturn((inst, inst))
- compute_utils.add_instance_fault_from_exc(self.context,
- fake_instance_uuid, mox.IsA(exception.NoValidHost),
- mox.IgnoreArg())
+ compute_utils.add_instance_fault_from_exc(self.context, new_ref,
+ mox.IsA(exception.NoValidHost), mox.IgnoreArg())
self.mox.ReplayAll()
self.manager.run_instance(self.context, request_spec,
@@ -217,12 +217,12 @@ class SchedulerManagerTestCase(test.TestCase):
}
self.manager.driver.schedule_prep_resize(**kwargs).AndRaise(
exception.NoValidHost(reason=""))
- db.instance_update_and_get_original(self.context, fake_instance_uuid,
+ old_ref, new_ref = db.instance_update_and_get_original(self.context,
+ fake_instance_uuid,
{"vm_state": vm_states.ACTIVE, "task_state": None}).AndReturn(
(inst, inst))
- compute_utils.add_instance_fault_from_exc(self.context,
- fake_instance_uuid, mox.IsA(exception.NoValidHost),
- mox.IgnoreArg())
+ compute_utils.add_instance_fault_from_exc(self.context, new_ref,
+ mox.IsA(exception.NoValidHost), mox.IgnoreArg())
self.mox.ReplayAll()
self.manager.prep_resize(**kwargs)
@@ -254,12 +254,12 @@ class SchedulerManagerTestCase(test.TestCase):
"vm_state": "",
"task_state": "",
}
- db.instance_update_and_get_original(self.context, fake_instance_uuid,
+ old_ref, new_ref = db.instance_update_and_get_original(self.context,
+ fake_instance_uuid,
{"vm_state": vm_states.ERROR,
"task_state": None}).AndReturn((inst, inst))
- compute_utils.add_instance_fault_from_exc(self.context,
- fake_instance_uuid, mox.IsA(test.TestingException),
- mox.IgnoreArg())
+ compute_utils.add_instance_fault_from_exc(self.context, new_ref,
+ mox.IsA(test.TestingException), mox.IgnoreArg())
self.mox.ReplayAll()