summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Subiros <david.perez5@hp.com>2011-11-16 17:31:29 +0000
committerVishvananda Ishaya <vishvananda@gmail.com>2011-12-12 17:27:03 -0800
commitff753cd608973f5d72a80aef0f9fb8a646fccc3f (patch)
tree0b51b6b1263d29fef254fb1e61863fcc55b56c10
parentd3b75b75aa937380f04b5320b70c8673821af203 (diff)
downloadnova-ff753cd608973f5d72a80aef0f9fb8a646fccc3f.tar.gz
nova-ff753cd608973f5d72a80aef0f9fb8a646fccc3f.tar.xz
nova-ff753cd608973f5d72a80aef0f9fb8a646fccc3f.zip
Vm state management and error states
this implements the blueprint nova-vm-state-management It implements the following functionalities: - Filter compute api calls according to state of the VM (defined in compute/state_checker). - Sets error state if the scheduler cannot allocate the VM in any host - Handles the create/delete concurrency in the compute manager Change-Id: Ie6d016b7d4781f70bb5967f204fa88a6412bd727
-rw-r--r--nova/compute/api.py118
-rw-r--r--nova/compute/manager.py35
-rw-r--r--nova/compute/state_checker.py137
-rw-r--r--nova/compute/task_states.py11
-rw-r--r--nova/compute/vm_states.py6
-rw-r--r--nova/exception.py5
-rw-r--r--nova/scheduler/manager.py30
-rw-r--r--nova/tests/api/ec2/test_cloud.py21
-rw-r--r--nova/tests/api/openstack/v2/test_servers.py5
-rw-r--r--nova/tests/scheduler/test_scheduler.py57
-rw-r--r--nova/tests/test_compute.py208
11 files changed, 533 insertions, 100 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index f65b5cead..89c4399f1 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -35,6 +35,7 @@ from nova import utils
from nova import volume
from nova.compute import instance_types
from nova.compute import power_state
+from nova.compute import state_checker
from nova.compute import task_states
from nova.compute import vm_states
from nova.scheduler import api as scheduler_api
@@ -49,25 +50,8 @@ flags.DECLARE('enable_zone_routing', 'nova.scheduler.api')
flags.DECLARE('vncproxy_topic', 'nova.vnc')
flags.DEFINE_integer('find_host_timeout', 30,
'Timeout after NN seconds when looking for a host.')
-
-
-def _is_able_to_shutdown(instance):
- vm_state = instance["vm_state"]
- instance_uuid = instance["uuid"]
-
- valid_shutdown_states = [
- vm_states.ACTIVE,
- vm_states.REBUILDING,
- vm_states.BUILDING,
- vm_states.ERROR,
- ]
-
- if vm_state not in valid_shutdown_states:
- LOG.warn(_("Instance %(instance_uuid)s cannot be shutdown from "
- "its current state: %(vm_state)s.") % locals())
- return False
-
- return True
+flags.DEFINE_boolean('api_check_vm_states', True,
+ 'Filter calls by vm state')
def _is_queued_delete(instance):
@@ -83,6 +67,27 @@ def _is_queued_delete(instance):
return True
+class check_vm_state(object):
+ """Class to wrap API functions that are sensitive to the VM state.
+
+ If the instance is in the wrong state, the wrapper will raise an exception.
+ It uses state_checker to decide if the call is allowed or not.
+ """
+
+ def __init__(self, method_name):
+ self.method_name = method_name
+
+ def __call__(self, f):
+ def _state_checker_wrap(api, context, instance, *args, **kw):
+ if FLAGS.api_check_vm_states and \
+ state_checker.is_blocked(self.method_name, context, instance):
+ raise exception.InstanceInvalidState(\
+ instance_uuid=instance['uuid'], method=self.method_name)
+ else:
+ return f(api, context, instance, *args, **kw)
+ return _state_checker_wrap
+
+
class API(base.Base):
"""API for interacting with the compute manager."""
@@ -766,15 +771,13 @@ class API(base.Base):
rv = self.db.instance_update(context, instance["id"], kwargs)
return dict(rv.iteritems())
- @scheduler_api.reroute_compute("soft_delete")
+ @check_vm_state(state_checker.SOFT_DELETE)
+ @scheduler_api.reroute_compute(state_checker.SOFT_DELETE)
def soft_delete(self, context, instance):
"""Terminate an instance."""
instance_uuid = instance["uuid"]
LOG.debug(_("Going to try to soft delete %s"), instance_uuid)
- if not _is_able_to_shutdown(instance):
- return
-
# NOTE(jerdfelt): The compute daemon handles reclaiming instances
# that are in soft delete. If there is no host assigned, there is
# no daemon to reclaim, so delete it immediately.
@@ -806,20 +809,18 @@ class API(base.Base):
else:
self.db.instance_destroy(context, instance['id'])
- @scheduler_api.reroute_compute("delete")
+ @check_vm_state(state_checker.DELETE)
+ @scheduler_api.reroute_compute(state_checker.DELETE)
def delete(self, context, instance):
"""Terminate an instance."""
LOG.debug(_("Going to try to terminate %s"), instance["uuid"])
- if not _is_able_to_shutdown(instance):
- return
-
self._delete(context, instance)
- @scheduler_api.reroute_compute("restore")
+ @check_vm_state(state_checker.RESTORE)
+ @scheduler_api.reroute_compute(state_checker.RESTORE)
def restore(self, context, instance):
"""Restore a previously deleted (but not reclaimed) instance."""
-
if not _is_queued_delete(instance):
return
@@ -837,7 +838,8 @@ class API(base.Base):
self._cast_compute_message('power_on_instance', context,
instance['uuid'], host)
- @scheduler_api.reroute_compute("force_delete")
+ @check_vm_state(state_checker.FORCE_DELETE)
+ @scheduler_api.reroute_compute(state_checker.FORCE_DELETE)
def force_delete(self, context, instance):
"""Force delete a previously deleted (but not reclaimed) instance."""
@@ -846,15 +848,13 @@ class API(base.Base):
self._delete(context, instance)
- @scheduler_api.reroute_compute("stop")
+ @check_vm_state(state_checker.STOP)
+ @scheduler_api.reroute_compute(state_checker.STOP)
def stop(self, context, instance):
"""Stop an instance."""
instance_uuid = instance["uuid"]
LOG.debug(_("Going to try to stop %s"), instance_uuid)
- if not _is_able_to_shutdown(instance):
- return
-
self.update(context,
instance,
vm_state=vm_states.ACTIVE,
@@ -867,6 +867,7 @@ class API(base.Base):
self._cast_compute_message('stop_instance', context,
instance_uuid, host)
+ @check_vm_state(state_checker.START)
def start(self, context, instance):
"""Start an instance."""
vm_state = instance["vm_state"]
@@ -1078,7 +1079,8 @@ class API(base.Base):
raise exception.Error(_("Unable to find host for Instance %s")
% instance_uuid)
- @scheduler_api.reroute_compute("backup")
+ @check_vm_state(state_checker.BACKUP)
+ @scheduler_api.reroute_compute(state_checker.BACKUP)
def backup(self, context, instance, name, backup_type, rotation,
extra_properties=None):
"""Backup the given instance
@@ -1095,7 +1097,8 @@ class API(base.Base):
extra_properties=extra_properties)
return recv_meta
- @scheduler_api.reroute_compute("snapshot")
+ @check_vm_state(state_checker.SNAPSHOT)
+ @scheduler_api.reroute_compute(state_checker.SNAPSHOT)
def snapshot(self, context, instance, name, extra_properties=None):
"""Snapshot the given instance.
@@ -1125,12 +1128,6 @@ class API(base.Base):
task_state = instance["task_state"]
instance_uuid = instance['uuid']
- if task_state == task_states.IMAGE_BACKUP:
- raise exception.InstanceBackingUp(instance_uuid=instance_uuid)
-
- if task_state == task_states.IMAGE_SNAPSHOT:
- raise exception.InstanceSnapshotting(instance_uuid=instance_uuid)
-
properties = {
'instance_uuid': instance_uuid,
'user_id': str(context.user_id),
@@ -1150,7 +1147,8 @@ class API(base.Base):
params=params)
return recv_meta
- @scheduler_api.reroute_compute("reboot")
+ @check_vm_state(state_checker.REBOOT)
+ @scheduler_api.reroute_compute(state_checker.REBOOT)
def reboot(self, context, instance, reboot_type):
"""Reboot the given instance."""
state = {'SOFT': task_states.REBOOTING,
@@ -1164,16 +1162,13 @@ class API(base.Base):
instance['uuid'],
params={'reboot_type': reboot_type})
- @scheduler_api.reroute_compute("rebuild")
+ @check_vm_state(state_checker.REBUILD)
+ @scheduler_api.reroute_compute(state_checker.REBUILD)
def rebuild(self, context, instance, image_href, admin_password,
name=None, metadata=None, files_to_inject=None):
"""Rebuild the given instance with the provided metadata."""
name = name or instance["display_name"]
- if instance["vm_state"] != vm_states.ACTIVE:
- msg = _("Instance must be active to rebuild.")
- raise exception.RebuildRequiresActiveInstance(msg)
-
files_to_inject = files_to_inject or []
metadata = metadata or {}
@@ -1199,7 +1194,8 @@ class API(base.Base):
instance["uuid"],
params=rebuild_params)
- @scheduler_api.reroute_compute("revert_resize")
+ @check_vm_state(state_checker.REVERT_RESIZE)
+ @scheduler_api.reroute_compute(state_checker.REVERT_RESIZE)
def revert_resize(self, context, instance):
"""Reverts a resize, deleting the 'new' instance in the process."""
context = context.elevated()
@@ -1223,7 +1219,8 @@ class API(base.Base):
self.db.migration_update(context, migration_ref['id'],
{'status': 'reverted'})
- @scheduler_api.reroute_compute("confirm_resize")
+ @check_vm_state(state_checker.CONFIRM_RESIZE)
+ @scheduler_api.reroute_compute(state_checker.CONFIRM_RESIZE)
def confirm_resize(self, context, instance):
"""Confirms a migration/resize and deletes the 'old' instance."""
context = context.elevated()
@@ -1249,7 +1246,8 @@ class API(base.Base):
self.db.instance_update(context, instance['uuid'],
{'host': migration_ref['dest_compute'], })
- @scheduler_api.reroute_compute("resize")
+ @check_vm_state(state_checker.RESIZE)
+ @scheduler_api.reroute_compute(state_checker.RESIZE)
def resize(self, context, instance, flavor_id=None):
"""Resize (ie, migrate) a running instance.
@@ -1330,7 +1328,8 @@ class API(base.Base):
# didn't raise so this is the correct zone
self.network_api.add_network_to_project(context, project_id)
- @scheduler_api.reroute_compute("pause")
+ @check_vm_state(state_checker.PAUSE)
+ @scheduler_api.reroute_compute(state_checker.PAUSE)
def pause(self, context, instance):
"""Pause the given instance."""
instance_uuid = instance["uuid"]
@@ -1340,7 +1339,8 @@ class API(base.Base):
task_state=task_states.PAUSING)
self._cast_compute_message('pause_instance', context, instance_uuid)
- @scheduler_api.reroute_compute("unpause")
+ @check_vm_state(state_checker.UNPAUSE)
+ @scheduler_api.reroute_compute(state_checker.UNPAUSE)
def unpause(self, context, instance):
"""Unpause the given instance."""
instance_uuid = instance["uuid"]
@@ -1377,7 +1377,8 @@ class API(base.Base):
"""Retrieve actions for the given instance."""
return self.db.instance_get_actions(context, instance['id'])
- @scheduler_api.reroute_compute("suspend")
+ @check_vm_state(state_checker.SUSPEND)
+ @scheduler_api.reroute_compute(state_checker.SUSPEND)
def suspend(self, context, instance):
"""Suspend the given instance."""
instance_uuid = instance["uuid"]
@@ -1387,7 +1388,8 @@ class API(base.Base):
task_state=task_states.SUSPENDING)
self._cast_compute_message('suspend_instance', context, instance_uuid)
- @scheduler_api.reroute_compute("resume")
+ @check_vm_state(state_checker.RESUME)
+ @scheduler_api.reroute_compute(state_checker.RESUME)
def resume(self, context, instance):
"""Resume the given instance."""
instance_uuid = instance["uuid"]
@@ -1397,7 +1399,8 @@ class API(base.Base):
task_state=task_states.RESUMING)
self._cast_compute_message('resume_instance', context, instance_uuid)
- @scheduler_api.reroute_compute("rescue")
+ @check_vm_state(state_checker.RESCUE)
+ @scheduler_api.reroute_compute(state_checker.RESCUE)
def rescue(self, context, instance, rescue_password=None):
"""Rescue the given instance."""
self.update(context,
@@ -1412,7 +1415,8 @@ class API(base.Base):
instance['uuid'],
params=rescue_params)
- @scheduler_api.reroute_compute("unrescue")
+ @check_vm_state(state_checker.UNRESCUE)
+ @scheduler_api.reroute_compute(state_checker.UNRESCUE)
def unrescue(self, context, instance):
"""Unrescue the given instance."""
self.update(context,
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 38929fb33..3e69c425b 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -297,6 +297,34 @@ class ComputeManager(manager.SchedulerDependentManager):
return (swap, ephemerals, block_device_mapping)
+ def _is_instance_terminated(self, instance_uuid):
+ """Instance in DELETING task state or not found in DB"""
+ context = nova.context.get_admin_context()
+ try:
+ instance = self.db.instance_get_by_uuid(context, instance_uuid)
+ if instance['task_state'] == task_states.DELETING:
+ return True
+ return False
+ except:
+ return True
+
+ def _shutdown_instance_even_if_deleted(self, context, instance_uuid):
+ """Call terminate_instance even for already deleted instances"""
+ LOG.info(_("Going to force the deletion of the vm %(instance_uuid)s, "
+ "even if it is deleted") % locals())
+ try:
+ try:
+ self.terminate_instance(context, instance_uuid)
+ except exception.InstanceNotFound:
+ LOG.info(_("Instance %(instance_uuid)s did not exist in the "
+ "DB, but I will shut it down anyway using a special "
+ "context") % locals())
+ ctxt = nova.context.get_admin_context(True)
+ self.terminate_instance(ctxt, instance_uuid)
+ except Exception as ex:
+ LOG.info(_("exception terminating the instance "
+ "%(instance_id)s") % locals())
+
def _run_instance(self, context, instance_uuid,
requested_networks=None,
injected_files=[],
@@ -320,9 +348,14 @@ class ComputeManager(manager.SchedulerDependentManager):
with utils.save_and_reraise_exception():
self._deallocate_network(context, instance)
self._notify_about_instance_usage(instance)
+ if self._is_instance_terminated(instance_uuid):
+ raise exception.InstanceNotFound
except exception.InstanceNotFound:
LOG.exception(_("Instance %s not found.") % instance_uuid)
- return # assuming the instance was already deleted
+ # assuming the instance was already deleted, run "delete" again
+ # just in case
+ self._shutdown_instance_even_if_deleted(context, instance_uuid)
+ return
except Exception as e:
with utils.save_and_reraise_exception():
self._instance_update(context, instance_uuid,
diff --git a/nova/compute/state_checker.py b/nova/compute/state_checker.py
new file mode 100644
index 000000000..9dcdebe8c
--- /dev/null
+++ b/nova/compute/state_checker.py
@@ -0,0 +1,137 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2010 United States Government as represented by the
+# Administrator of the National Aeronautics and Space Administration.
+# Copyright 2011 Justin Santa Barbara
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from nova import db
+from nova.compute import task_states as ts
+from nova.compute import vm_states as vm
+from nova import context as ctxt
+
+# Function names that run the state check before their execution:
+REBOOT = 'reboot'
+START = 'start'
+REBUILD = 'rebuild'
+STOP = 'stop'
+PAUSE = 'pause'
+BACKUP = 'backup'
+UNPAUSE = 'unpause'
+SUSPEND = 'suspend'
+RESUME = 'resume'
+RESCUE = 'rescue'
+UNRESCUE = 'unrescue'
+SNAPSHOT = 'snapshot'
+RESIZE = 'resize'
+CONFIRM_RESIZE = 'confirm_resize'
+REVERT_RESIZE = 'revert_resize'
+DELETE = 'delete'
+SOFT_DELETE = 'soft_delete'
+FORCE_DELETE = 'force_delete'
+RESTORE = 'restore'
+
+
+# Aux variables to save cpu time, used by blocker dictionaries
+all_ts_but_resize_verify = list(set(ts.get_list()) - set([ts.RESIZE_VERIFY]))
+all_vm_but_act_resc = list(set(vm.get_list()) - set([vm.ACTIVE, vm.RESCUED]))
+all_vm_but_active = list(set(vm.get_list()) - set([vm.ACTIVE]))
+
+# Call blocked if the vm task_state is found in the corresponding list
+block_for_task_state = {
+ REBOOT: all_ts_but_resize_verify,
+ START: all_ts_but_resize_verify,
+ REBUILD: all_ts_but_resize_verify,
+ PAUSE: all_ts_but_resize_verify,
+ STOP: all_ts_but_resize_verify,
+ UNPAUSE: all_ts_but_resize_verify,
+ SUSPEND: all_ts_but_resize_verify,
+ RESUME: all_ts_but_resize_verify,
+ RESCUE: all_ts_but_resize_verify,
+ UNRESCUE: all_ts_but_resize_verify,
+ SNAPSHOT: all_ts_but_resize_verify,
+ BACKUP: all_ts_but_resize_verify,
+ RESIZE: all_ts_but_resize_verify,
+ CONFIRM_RESIZE: all_ts_but_resize_verify,
+ REVERT_RESIZE: all_ts_but_resize_verify}
+
+# Call blocked if the vm vm_state is found in the corresponding list
+block_for_vm_state = {
+ REBOOT: all_vm_but_act_resc,
+ START: list(set(vm.get_list()) - set([vm.STOPPED])),
+ REBUILD: all_vm_but_active,
+ PAUSE: all_vm_but_act_resc,
+ STOP: all_vm_but_act_resc,
+ UNPAUSE: list(set(vm.get_list()) - set([vm.PAUSED])),
+ SUSPEND: all_vm_but_act_resc,
+ RESUME: list(set(vm.get_list()) - set([vm.SUSPENDED])),
+ RESCUE: list(set(vm.get_list()) - set([vm.ACTIVE, vm.STOPPED])),
+ UNRESCUE: list(set(vm.get_list()) - set([vm.ACTIVE, vm.RESCUED])),
+ SNAPSHOT: all_vm_but_active,
+ BACKUP: all_vm_but_active,
+ RESIZE: all_vm_but_active,
+ CONFIRM_RESIZE: all_vm_but_active,
+ REVERT_RESIZE: all_vm_but_active}
+
+# Call blocked if the combination of vm_state, power_state and task_state is
+# found in the corresponding list
+block_for_combination = {
+ CONFIRM_RESIZE: [{'vm_state': vm.ACTIVE, 'task_state': None}],
+ REVERT_RESIZE: [{'vm_state': vm.ACTIVE, 'task_state': None}]}
+
+
+def is_blocked(method_name, context, instance_ref):
+ """
+ Is the method blocked for the VM state?
+
+ This method returns False if the state of the vm is found
+ in the blocked dictionaries for the method.
+ """
+ if instance_ref['task_state'] in block_for_task_state.get(method_name, ()):
+ return True
+ if instance_ref['vm_state'] in block_for_vm_state.get(method_name, ()):
+ return True
+ if method_name in block_for_combination:
+ return _is_combination_blocked(method_name, instance_ref)
+ # Allow the method if not found in any list
+ return False
+
+
+def _is_combination_blocked(method_name, instance_ref):
+ """
+ Is the method blocked according to the blocked_combination dictionary?
+
+ To be blocked, all the elements
+ in a dictionary need to match the vm states.
+ If a value is not present in a dictionary we assume that the dictionary
+ applies for any value of that particular element
+ """
+ for blocked_element in block_for_combination[method_name]:
+ # Check power state
+ if 'power_state' in blocked_element and instance_ref['power_state']\
+ != blocked_element['power_state']:
+ continue
+ # Check vm state
+ if 'vm_state' in blocked_element and instance_ref['vm_state']\
+ != blocked_element['vm_state']:
+ continue
+ # Check task state
+ if 'task_state' in blocked_element and instance_ref['task_state']\
+ != blocked_element['task_state']:
+ continue
+ return True
+ # After analyzing all the dictionaries for the method, none tells us to
+ # block the function
+ return False
diff --git a/nova/compute/task_states.py b/nova/compute/task_states.py
index c6016b509..3765fc79c 100644
--- a/nova/compute/task_states.py
+++ b/nova/compute/task_states.py
@@ -60,3 +60,14 @@ UNRESCUING = 'unrescuing'
DELETING = 'deleting'
STOPPING = 'stopping'
STARTING = 'starting'
+
+
+def get_list():
+ """Returns a list of all the possible task_states"""
+ return [SCHEDULING, BLOCK_DEVICE_MAPPING, NETWORKING, SPAWNING,
+ IMAGE_SNAPSHOT, IMAGE_BACKUP, UPDATING_PASSWORD, RESIZE_PREP,
+ RESIZE_MIGRATING, RESIZE_MIGRATED, RESIZE_FINISH, RESIZE_REVERTING,
+ RESIZE_CONFIRMING, RESIZE_VERIFY, REBUILDING, REBOOTING,
+ REBOOTING_HARD, PAUSING, UNPAUSING, SUSPENDING, RESUMING,
+ POWERING_OFF, POWERING_ON, RESCUING, UNRESCUING, DELETING,
+ STOPPING, STARTING]
diff --git a/nova/compute/vm_states.py b/nova/compute/vm_states.py
index f219bf7f4..4d3d524c5 100644
--- a/nova/compute/vm_states.py
+++ b/nova/compute/vm_states.py
@@ -38,3 +38,9 @@ MIGRATING = 'migrating'
RESIZING = 'resizing'
ERROR = 'error'
+
+
+def get_list():
+ """Returns a list of all the possible vm_states"""
+ return [ACTIVE, BUILDING, REBUILDING, PAUSED, SUSPENDED, RESCUED,
+ DELETED, STOPPED, SOFT_DELETE, MIGRATING, RESIZING, ERROR]
diff --git a/nova/exception.py b/nova/exception.py
index 8d0371b96..d0ccfb627 100644
--- a/nova/exception.py
+++ b/nova/exception.py
@@ -250,6 +250,11 @@ class InvalidParameterValue(Invalid):
message = _("%(err)s")
+class InstanceInvalidState(Invalid):
+ message = _("Instance %(instance_uuid)s in state %(state)s. Cannot "
+ "%(method)s while the instance is in this state.")
+
+
class InstanceNotRunning(Invalid):
message = _("Instance %(instance_id)s is not running.")
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
index 5aadc6e82..38dda9931 100644
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@@ -25,12 +25,13 @@ import functools
from nova.compute import vm_states
from nova import db
+from nova import exception
from nova import flags
from nova import log as logging
from nova import manager
from nova import rpc
-from nova import utils
from nova.scheduler import zone_manager
+from nova import utils
LOG = logging.getLogger('nova.scheduler.manager')
FLAGS = flags.FLAGS
@@ -101,17 +102,24 @@ class SchedulerManager(manager.Manager):
# Scheduler methods are responsible for casting.
try:
return real_meth(*args, **kwargs)
- except Exception as e:
- # If this affects a particular instance, move that
- # instance to the ERROR state
- if 'instance_id' in kwargs:
- instance_id = kwargs['instance_id']
- LOG.warning(_("Failed to %(driver_method)s: %(e)s. "
- "Putting instance %(instance_id)s into "
+ except exception.NoValidHost as ex:
+ self._set_instance_error(method, context, ex, *args, **kwargs)
+ except Exception as ex:
+ with utils.save_and_reraise_exception():
+ self._set_instance_error(method, context, ex, *args, **kwargs)
+
+ # NOTE (David Subiros) : If the exception is raised ruing run_instance
+ # method, the DB record probably does not exist yet.
+ def _set_instance_error(self, method, context, ex, *args, **kwargs):
+ """Sets VM to Error state"""
+ LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals())
+ if method == "start_instance" or method == "run_instance":
+ instance_id = kwargs['instance_id']
+ if instance_id:
+ LOG.warning(_("Setting instance %(instance_id)s to "
"ERROR state.") % locals())
- db.instance_update(context, kwargs['instance_id'],
- dict(vm_state=vm_states.ERROR))
- raise
+ db.instance_update(context, instance_id,
+ {'vm_state': vm_states.ERROR})
# NOTE (masumotok) : This method should be moved to nova.api.ec2.admin.
# Based on bexar design summit discussion,
diff --git a/nova/tests/api/ec2/test_cloud.py b/nova/tests/api/ec2/test_cloud.py
index 78e7f96fe..039282147 100644
--- a/nova/tests/api/ec2/test_cloud.py
+++ b/nova/tests/api/ec2/test_cloud.py
@@ -1418,9 +1418,10 @@ class CloudTestCase(test.TestCase):
'max_count': 1, }
instance_id = self._run_instance(**kwargs)
- # a running instance can't be started. It is just ignored.
- result = self.cloud.start_instances(self.context, [instance_id])
- self.assertTrue(result)
+ # a running instance can't be started.
+ self.assertRaises(exception.InstanceInvalidState,
+ self.cloud.start_instances,
+ self.context, [instance_id])
result = self.cloud.stop_instances(self.context, [instance_id])
self.assertTrue(result)
@@ -1469,9 +1470,10 @@ class CloudTestCase(test.TestCase):
'max_count': 1, }
instance_id = self._run_instance(**kwargs)
- # a running instance can't be started. It is just ignored.
- result = self.cloud.start_instances(self.context, [instance_id])
- self.assertTrue(result)
+ # a running instance can't be started.
+ self.assertRaises(exception.InstanceInvalidState,
+ self.cloud.start_instances,
+ self.context, [instance_id])
result = self.cloud.terminate_instances(self.context, [instance_id])
self.assertTrue(result)
@@ -1483,9 +1485,10 @@ class CloudTestCase(test.TestCase):
'max_count': 1, }
instance_id = self._run_instance(**kwargs)
- # a running instance can't be started. It is just ignored.
- result = self.cloud.start_instances(self.context, [instance_id])
- self.assertTrue(result)
+ # a running instance can't be started.
+ self.assertRaises(exception.InstanceInvalidState,
+ self.cloud.start_instances,
+ self.context, [instance_id])
result = self.cloud.reboot_instances(self.context, [instance_id])
self.assertTrue(result)
diff --git a/nova/tests/api/openstack/v2/test_servers.py b/nova/tests/api/openstack/v2/test_servers.py
index f6f78609d..258928eae 100644
--- a/nova/tests/api/openstack/v2/test_servers.py
+++ b/nova/tests/api/openstack/v2/test_servers.py
@@ -1182,6 +1182,11 @@ class ServersControllerTest(test.TestCase):
body = dict(reboot=dict(type="HARD"))
req = fakes.HTTPRequest.blank(
'/v2/fake/servers/%s/action' % FAKE_UUID)
+ # Assume the instance is in ACTIVE state before calling reboot
+ self.stubs.Set(nova.db, 'instance_get',
+ return_server_with_state(vm_states.ACTIVE))
+ self.stubs.Set(nova.db, 'instance_get_by_uuid',
+ return_server_with_state(vm_states.ACTIVE))
self.controller.action(req, FAKE_UUID, body)
self.test_server_actions()
diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py
index 32ff67f1d..c0fce0fe3 100644
--- a/nova/tests/scheduler/test_scheduler.py
+++ b/nova/tests/scheduler/test_scheduler.py
@@ -21,6 +21,7 @@ Tests For Scheduler
import datetime
import mox
+import stubout
from novaclient import v1_1 as novaclient
from novaclient import exceptions as novaclient_exceptions
@@ -38,9 +39,9 @@ from nova.scheduler import driver
from nova.scheduler import manager
from nova.scheduler.simple import SimpleScheduler
from nova.compute import power_state
+from nova.compute import task_states
from nova.compute import vm_states
-
FLAGS = flags.FLAGS
flags.DECLARE('max_cores', 'nova.scheduler.simple')
flags.DECLARE('stub_network', 'nova.compute.manager')
@@ -143,6 +144,10 @@ class SchedulerTestCase(test.TestCase):
driver = 'nova.tests.scheduler.test_scheduler.TestDriver'
self.flags(scheduler_driver=driver)
+ def tearDown(self):
+ self.stubs.UnsetAll()
+ super(SchedulerTestCase, self).tearDown()
+
def _create_compute_service(self):
"""Create compute-manager(ComputeNode and Service record)."""
ctxt = context.get_admin_context()
@@ -205,6 +210,41 @@ class SchedulerTestCase(test.TestCase):
return False
return True
+ def _assert_state(self, state_dict):
+ """assert the instance is in the state defined by state_dict"""
+ instances = db.instance_get_all(context.get_admin_context())
+ self.assertEqual(len(instances), 1)
+
+ if 'vm_state' in state_dict:
+ self.assertEqual(state_dict['vm_state'], instances[0]['vm_state'])
+ if 'task_state' in state_dict:
+ self.assertEqual(state_dict['task_state'],
+ instances[0]['task_state'])
+ if 'power_state' in state_dict:
+ self.assertEqual(state_dict['power_state'],
+ instances[0]['power_state'])
+
+ def test_no_valid_host_exception_on_start(self):
+ """check the vm goes to ERROR state if the scheduler fails.
+
+ If the scheduler driver cannot allocate a host for the VM during
+ start_instance, it will raise a NoValidHost exception. In this
+ scenario, we have to make sure that the VM state is set to ERROR.
+ """
+ def NoValidHost_raiser(context, topic, *args, **kwargs):
+ raise exception.NoValidHost(_("Test NoValidHost exception"))
+ scheduler = manager.SchedulerManager()
+ ins_ref = _create_instance(task_state=task_states.STARTING,
+ vm_state=vm_states.STOPPED)
+ self.stubs = stubout.StubOutForTesting()
+ self.stubs.Set(TestDriver, 'schedule', NoValidHost_raiser)
+ self.mox.StubOutWithMock(rpc, 'cast', use_mock_anything=True)
+ ctxt = context.get_admin_context()
+ scheduler.start_instance(ctxt, 'topic', instance_id=ins_ref['id'])
+ # assert that the instance goes to ERROR state
+ self._assert_state({'vm_state': vm_states.ERROR,
+ 'task_state': task_states.STARTING})
+
def test_show_host_resources_no_project(self):
"""No instance are running on the given host."""
@@ -247,21 +287,6 @@ class SchedulerTestCase(test.TestCase):
db.instance_destroy(ctxt, i_ref1['id'])
db.instance_destroy(ctxt, i_ref2['id'])
- def test_exception_puts_instance_in_error_state(self):
- """Test that an exception from the scheduler puts an instance
- in the ERROR state."""
-
- scheduler = manager.SchedulerManager()
- ctxt = context.get_admin_context()
- inst = _create_instance()
- self.assertRaises(Exception, scheduler._schedule,
- 'failing_method', ctxt, 'scheduler',
- instance_id=inst['uuid'])
-
- # Refresh the instance
- inst = db.instance_get(ctxt, inst['id'])
- self.assertEqual(inst['vm_state'], vm_states.ERROR)
-
class SimpleDriverTestCase(test.TestCase):
"""Test case for simple driver"""
diff --git a/nova/tests/test_compute.py b/nova/tests/test_compute.py
index a0517934e..abe21db91 100644
--- a/nova/tests/test_compute.py
+++ b/nova/tests/test_compute.py
@@ -19,7 +19,6 @@
"""
Tests For Compute
"""
-
from copy import copy
from webob import exc
@@ -30,6 +29,7 @@ from nova import compute
from nova.compute import instance_types
from nova.compute import manager as compute_manager
from nova.compute import power_state
+from nova.compute import state_checker
from nova.compute import task_states
from nova.compute import vm_states
from nova import context
@@ -111,6 +111,8 @@ class BaseTestCase(test.TestCase):
self.project_id = 'fake'
self.context = context.RequestContext(self.user_id, self.project_id)
test_notifier.NOTIFICATIONS = []
+ self.mox = mox.Mox()
+ self.total_waits = 0
def fake_show(meh, context, id):
return {'id': 1, 'min_disk': None, 'min_ram': None,
@@ -120,7 +122,14 @@ class BaseTestCase(test.TestCase):
self.stubs.Set(rpc, 'call', rpc_call_wrapper)
self.stubs.Set(rpc, 'cast', rpc_cast_wrapper)
- def _create_fake_instance(self, params=None):
+ def tearDown(self):
+ self.mox.UnsetStubs()
+ instances = db.instance_get_all(self.context.elevated())
+ for instance in instances:
+ db.instance_destroy(self.context.elevated(), instance['id'])
+ super(BaseTestCase, self).tearDown()
+
+ def _create_fake_instance(self, params=None, type_name='m1.tiny'):
"""Create a test instance"""
if not params:
params = {}
@@ -131,12 +140,16 @@ class BaseTestCase(test.TestCase):
inst['launch_time'] = '10'
inst['user_id'] = self.user_id
inst['project_id'] = self.project_id
- type_id = instance_types.get_instance_type_by_name('m1.tiny')['id']
+ type_id = instance_types.get_instance_type_by_name(type_name)['id']
inst['instance_type_id'] = type_id
inst['ami_launch_index'] = 0
inst.update(params)
return db.instance_create(self.context, inst)
+ def _create_instance(self, params=None, type_name='m1.tiny'):
+ """Create a test instance. Returns uuid"""
+ return self._create_fake_instance(params, type_name=type_name)['uuid']
+
def _create_instance_type(self, params=None):
"""Create a test instance type"""
if not params:
@@ -195,6 +208,77 @@ class ComputeTestCase(BaseTestCase):
finally:
db.instance_destroy(self.context, instance['id'])
+ def _assert_state(self, state_dict):
+ """assert the instance is in the state defined by state_dict"""
+ instances = db.instance_get_all(context.get_admin_context())
+ self.assertEqual(len(instances), 1)
+
+ if 'vm_state' in state_dict:
+ self.assertEqual(state_dict['vm_state'], instances[0]['vm_state'])
+ if 'task_state' in state_dict:
+ self.assertEqual(state_dict['task_state'],
+ instances[0]['task_state'])
+ if 'power_state' in state_dict:
+ self.assertEqual(state_dict['power_state'],
+ instances[0]['power_state'])
+
+ def test_fail_to_schedule_persists(self):
+ """check the persistence of the ERROR(scheduling) state"""
+ self._create_instance(params={'vm_state': vm_states.ERROR,
+ 'task_state': task_states.SCHEDULING})
+ #check state is failed even after the periodic poll
+ error_list = self.compute.periodic_tasks(context.get_admin_context())
+ self._assert_state({'vm_state': vm_states.ERROR,
+ 'task_state': task_states.SCHEDULING})
+
+ def test_run_instance_setup_block_device_mapping_fail(self):
+ """ block device mapping failure test.
+
+ Make sure that when there is a block device mapping problem,
+ the instance goes to ERROR state, keeping the task state
+ """
+ def fake(*args, **kwargs):
+ raise Exception("Failed to block device mapping")
+ self.stubs.Set(nova.compute.manager.ComputeManager,
+ '_setup_block_device_mapping', fake)
+ instance_uuid = self._create_instance()
+ self.assertRaises(Exception, self.compute.run_instance,
+ self.context, instance_uuid)
+ #check state is failed even after the periodic poll
+ self._assert_state({'vm_state': vm_states.ERROR,
+ 'task_state': task_states.BLOCK_DEVICE_MAPPING})
+ error_list = self.compute.periodic_tasks(context.get_admin_context())
+ self._assert_state({'vm_state': vm_states.ERROR,
+ 'task_state': task_states.BLOCK_DEVICE_MAPPING})
+
+ def test_run_instance_spawn_fail(self):
+ """ spawn failure test.
+
+ Make sure that when there is a spawning problem,
+ the instance goes to ERROR state, keeping the task state"""
+ def fake(*args, **kwargs):
+ raise Exception("Failed to spawn")
+ self.stubs.Set(self.compute.driver, 'spawn', fake)
+ instance_uuid = self._create_instance()
+ self.assertRaises(Exception, self.compute.run_instance,
+ self.context, instance_uuid)
+ #check state is failed even after the periodic poll
+ self._assert_state({'vm_state': vm_states.ERROR,
+ 'task_state': task_states.SPAWNING})
+ error_list = self.compute.periodic_tasks(context.get_admin_context())
+ self._assert_state({'vm_state': vm_states.ERROR,
+ 'task_state': task_states.SPAWNING})
+
+ def test_can_terminate_on_error_state(self):
+ """Make sure that the instance can be terminated in ERROR state"""
+ elevated = context.get_admin_context()
+ #check failed to schedule --> terminate
+ instance_uuid = self._create_instance(params={'vm_state':
+ vm_states.ERROR})
+ self.compute.terminate_instance(self.context, instance_uuid)
+ self.assertRaises(exception.InstanceNotFound, db.instance_get_by_uuid,
+ elevated, instance_uuid)
+
def test_run_terminate(self):
"""Make sure it is possible to run and terminate instance"""
instance = self._create_fake_instance()
@@ -1157,6 +1241,108 @@ class ComputeAPITestCase(BaseTestCase):
'properties': {'kernel_id': 1, 'ramdisk_id': 1},
}
+ def test_check_vm_state_filtered_function(self):
+ """Test the check_vm_state mechanism for filtered functions.
+
+ Checks that the filtered_function is correctly filtered
+ in the right states only for the api_check_vm_states flag set to True.
+
+ Note that the filtered_function takes the same number of arguments
+ than the real functions that are decorated in the compute api.
+ """
+ @compute.api.check_vm_state('filtered_function')
+ def filtered_function(api, context, instance_ref):
+ LOG.debug("filtered_function executed")
+ return True
+
+ def filtered_assume_right_state(instance_ref):
+ self.flags(api_check_vm_states=True)
+ self.assertTrue(filtered_function(self.compute_api,
+ self.context, instance_ref))
+
+ def filtered_assume_wrong_state(instance_ref):
+ self.flags(api_check_vm_states=True)
+ self.assertRaises(exception.InstanceInvalidState,
+ filtered_function, self.compute_api,
+ self.context, instance_ref)
+ self.flags(api_check_vm_states=False)
+ self.assertTrue(filtered_function(self.compute_api,
+ self.context, instance_ref))
+
+ # check that the filtered_function is correctly filtered
+ self._execute_allowed_and_blocked('filtered_function',
+ filtered_assume_right_state,
+ filtered_assume_wrong_state)
+
+ def test_check_vm_state_non_filtered_function(self):
+ """Test the check_vm_state mechanism for non filtered functions.
+
+ Checks that if a function that is decorated with the check_vm_state
+ but it is not defined in any blocked dictionary, it will always
+ be executed
+ """
+ @compute.api.check_vm_state('non_filtered_function')
+ def non_filtered_function(api, context, instance_ref):
+ LOG.debug("non_filtered_function executed")
+ return True
+
+ def non_filtered_assume_executed(instance_ref):
+ self.flags(api_check_vm_states=True)
+ self.assertTrue(non_filtered_function(self.compute_api,
+ self.context, instance_ref))
+
+ # check that the non_filtered_function is never filtered
+ self._execute_allowed_and_blocked('non_filtered_function',
+ non_filtered_assume_executed,
+ non_filtered_assume_executed)
+
+ def _execute_allowed_and_blocked(self, func_name, f_allowed, f_blocked):
+ """Execute f_allowed and f_blocked functions for all the scenarios.
+
+ Get an allowed vm_state, a blocked vm_state, an allowed task_state,
+ and a blocked task_state for the function defined by func_name to be
+ executed. Then it executes the function f_allowed or f_blocked
+ accordingly, passing as parameter a new instance id. Theses functions
+ have to run the func_name function and assert the expected result
+ """
+
+ # define blocked and allowed states
+ blocked_tsk = task_states.SCHEDULING
+ ok_task = task_states.NETWORKING
+ blocked_vm = vm_states.BUILDING
+ ok_vm = vm_states.RESCUED
+ blocked_comb = {'power_state': power_state.RUNNING,
+ 'vm_state': vm_states.ACTIVE, 'task_state': None}
+ ok_comb = {'power_state': power_state.RUNNING,
+ 'vm_state': vm_states.PAUSED, 'task_state': None}
+
+ # To guarantee a 100% test coverage we create fake lists.
+ fake_block_for_task_state = {'filtered_function': [blocked_tsk]}
+
+ fake_block_for_vm_state = {'filtered_function': [blocked_vm]}
+
+ fake_block_for_combination = {'filtered_function': [blocked_comb]}
+
+ self.stubs.Set(nova.compute.state_checker, 'block_for_task_state',
+ fake_block_for_task_state)
+ self.stubs.Set(nova.compute.state_checker, 'block_for_vm_state',
+ fake_block_for_vm_state)
+ self.stubs.Set(nova.compute.state_checker, 'block_for_combination',
+ fake_block_for_combination)
+
+ i_ref = self._create_fake_instance(params={'task_state': blocked_tsk})
+ f_blocked(i_ref)
+ i_ref = self._create_fake_instance(params={'task_state': ok_task})
+ f_allowed(i_ref)
+ i_ref = self._create_fake_instance(params={'vm_state': blocked_vm})
+ f_blocked(i_ref)
+ i_ref = self._create_fake_instance(params={'vm_state': ok_vm})
+ f_allowed(i_ref)
+ i_ref = self._create_fake_instance(params=blocked_comb)
+ f_blocked(i_ref)
+ i_ref = self._create_fake_instance(params=ok_comb)
+ f_allowed(i_ref)
+
def test_create_with_too_little_ram(self):
"""Test an instance type with too little memory"""
@@ -1416,10 +1602,14 @@ class ComputeAPITestCase(BaseTestCase):
db.instance_destroy(self.context, instance_uuid)
def test_resume(self):
- """Ensure instance can be resumed"""
+ """Ensure instance can be resumed (if suspended)"""
instance = self._create_fake_instance()
instance_uuid = instance['uuid']
+ instance_id = instance['id']
self.compute.run_instance(self.context, instance_uuid )
+ db.instance_update(self.context, instance_id,
+ {'vm_state': vm_states.SUSPENDED})
+ instance = db.instance_get(self.context, instance_id)
self.assertEqual(instance['task_state'], None)
@@ -1578,6 +1768,7 @@ class ComputeAPITestCase(BaseTestCase):
params = {'vm_state': vm_states.RESCUED, 'task_state': None}
db.instance_update(self.context, instance_uuid, params)
+ instance = db.instance_get_by_uuid(self.context, instance_uuid)
self.compute_api.unrescue(self.context, instance)
instance = db.instance_get_by_uuid(self.context, instance_uuid)
@@ -1625,7 +1816,7 @@ class ComputeAPITestCase(BaseTestCase):
db.instance_update(self.context, instance_uuid, instance_values)
instance = self.compute_api.get(self.context, instance_uuid)
- self.assertRaises(exception.InstanceBackingUp,
+ self.assertRaises(exception.InstanceInvalidState,
self.compute_api.backup,
self.context,
instance,
@@ -1643,7 +1834,7 @@ class ComputeAPITestCase(BaseTestCase):
db.instance_update(self.context, instance_uuid, instance_values)
instance = self.compute_api.get(self.context, instance_uuid)
- self.assertRaises(exception.InstanceSnapshotting,
+ self.assertRaises(exception.InstanceInvalidState,
self.compute_api.snapshot,
self.context,
instance,
@@ -1663,6 +1854,11 @@ class ComputeAPITestCase(BaseTestCase):
migration_ref = db.migration_create(context,
{'instance_uuid': instance['uuid'],
'status': 'finished'})
+ # set the state that the instance gets when resize finishes
+ db.instance_update(self.context, instance['uuid'],
+ {'task_state': task_states.RESIZE_VERIFY,
+ 'vm_state': vm_states.ACTIVE})
+ instance = db.instance_get_by_uuid(context, instance['uuid'])
self.compute_api.confirm_resize(context, instance)
self.compute.terminate_instance(context, instance['uuid'])