summaryrefslogtreecommitdiffstats
path: root/nova/compute
diff options
context:
space:
mode:
authorDavid Subiros <david.perez5@hp.com>2011-11-16 17:31:29 +0000
committerVishvananda Ishaya <vishvananda@gmail.com>2011-12-12 17:27:03 -0800
commitff753cd608973f5d72a80aef0f9fb8a646fccc3f (patch)
tree0b51b6b1263d29fef254fb1e61863fcc55b56c10 /nova/compute
parentd3b75b75aa937380f04b5320b70c8673821af203 (diff)
Vm state management and error states
this implements the blueprint nova-vm-state-management It implements the following functionalities: - Filter compute api calls according to state of the VM (defined in compute/state_checker). - Sets error state if the scheduler cannot allocate the VM in any host - Handles the create/delete concurrency in the compute manager Change-Id: Ie6d016b7d4781f70bb5967f204fa88a6412bd727
Diffstat (limited to 'nova/compute')
-rw-r--r--nova/compute/api.py118
-rw-r--r--nova/compute/manager.py35
-rw-r--r--nova/compute/state_checker.py137
-rw-r--r--nova/compute/task_states.py11
-rw-r--r--nova/compute/vm_states.py6
5 files changed, 249 insertions, 58 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index f65b5cead..89c4399f1 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -35,6 +35,7 @@ from nova import utils
from nova import volume
from nova.compute import instance_types
from nova.compute import power_state
+from nova.compute import state_checker
from nova.compute import task_states
from nova.compute import vm_states
from nova.scheduler import api as scheduler_api
@@ -49,25 +50,8 @@ flags.DECLARE('enable_zone_routing', 'nova.scheduler.api')
flags.DECLARE('vncproxy_topic', 'nova.vnc')
flags.DEFINE_integer('find_host_timeout', 30,
'Timeout after NN seconds when looking for a host.')
-
-
-def _is_able_to_shutdown(instance):
- vm_state = instance["vm_state"]
- instance_uuid = instance["uuid"]
-
- valid_shutdown_states = [
- vm_states.ACTIVE,
- vm_states.REBUILDING,
- vm_states.BUILDING,
- vm_states.ERROR,
- ]
-
- if vm_state not in valid_shutdown_states:
- LOG.warn(_("Instance %(instance_uuid)s cannot be shutdown from "
- "its current state: %(vm_state)s.") % locals())
- return False
-
- return True
+flags.DEFINE_boolean('api_check_vm_states', True,
+ 'Filter calls by vm state')
def _is_queued_delete(instance):
@@ -83,6 +67,27 @@ def _is_queued_delete(instance):
return True
+class check_vm_state(object):
+ """Class to wrap API functions that are sensitive to the VM state.
+
+ If the instance is in the wrong state, the wrapper will raise an exception.
+ It uses state_checker to decide if the call is allowed or not.
+ """
+
+ def __init__(self, method_name):
+ self.method_name = method_name
+
+ def __call__(self, f):
+ def _state_checker_wrap(api, context, instance, *args, **kw):
+ if FLAGS.api_check_vm_states and \
+ state_checker.is_blocked(self.method_name, context, instance):
+ raise exception.InstanceInvalidState(\
+ instance_uuid=instance['uuid'], method=self.method_name)
+ else:
+ return f(api, context, instance, *args, **kw)
+ return _state_checker_wrap
+
+
class API(base.Base):
"""API for interacting with the compute manager."""
@@ -766,15 +771,13 @@ class API(base.Base):
rv = self.db.instance_update(context, instance["id"], kwargs)
return dict(rv.iteritems())
- @scheduler_api.reroute_compute("soft_delete")
+ @check_vm_state(state_checker.SOFT_DELETE)
+ @scheduler_api.reroute_compute(state_checker.SOFT_DELETE)
def soft_delete(self, context, instance):
"""Terminate an instance."""
instance_uuid = instance["uuid"]
LOG.debug(_("Going to try to soft delete %s"), instance_uuid)
- if not _is_able_to_shutdown(instance):
- return
-
# NOTE(jerdfelt): The compute daemon handles reclaiming instances
# that are in soft delete. If there is no host assigned, there is
# no daemon to reclaim, so delete it immediately.
@@ -806,20 +809,18 @@ class API(base.Base):
else:
self.db.instance_destroy(context, instance['id'])
- @scheduler_api.reroute_compute("delete")
+ @check_vm_state(state_checker.DELETE)
+ @scheduler_api.reroute_compute(state_checker.DELETE)
def delete(self, context, instance):
"""Terminate an instance."""
LOG.debug(_("Going to try to terminate %s"), instance["uuid"])
- if not _is_able_to_shutdown(instance):
- return
-
self._delete(context, instance)
- @scheduler_api.reroute_compute("restore")
+ @check_vm_state(state_checker.RESTORE)
+ @scheduler_api.reroute_compute(state_checker.RESTORE)
def restore(self, context, instance):
"""Restore a previously deleted (but not reclaimed) instance."""
-
if not _is_queued_delete(instance):
return
@@ -837,7 +838,8 @@ class API(base.Base):
self._cast_compute_message('power_on_instance', context,
instance['uuid'], host)
- @scheduler_api.reroute_compute("force_delete")
+ @check_vm_state(state_checker.FORCE_DELETE)
+ @scheduler_api.reroute_compute(state_checker.FORCE_DELETE)
def force_delete(self, context, instance):
"""Force delete a previously deleted (but not reclaimed) instance."""
@@ -846,15 +848,13 @@ class API(base.Base):
self._delete(context, instance)
- @scheduler_api.reroute_compute("stop")
+ @check_vm_state(state_checker.STOP)
+ @scheduler_api.reroute_compute(state_checker.STOP)
def stop(self, context, instance):
"""Stop an instance."""
instance_uuid = instance["uuid"]
LOG.debug(_("Going to try to stop %s"), instance_uuid)
- if not _is_able_to_shutdown(instance):
- return
-
self.update(context,
instance,
vm_state=vm_states.ACTIVE,
@@ -867,6 +867,7 @@ class API(base.Base):
self._cast_compute_message('stop_instance', context,
instance_uuid, host)
+ @check_vm_state(state_checker.START)
def start(self, context, instance):
"""Start an instance."""
vm_state = instance["vm_state"]
@@ -1078,7 +1079,8 @@ class API(base.Base):
raise exception.Error(_("Unable to find host for Instance %s")
% instance_uuid)
- @scheduler_api.reroute_compute("backup")
+ @check_vm_state(state_checker.BACKUP)
+ @scheduler_api.reroute_compute(state_checker.BACKUP)
def backup(self, context, instance, name, backup_type, rotation,
extra_properties=None):
"""Backup the given instance
@@ -1095,7 +1097,8 @@ class API(base.Base):
extra_properties=extra_properties)
return recv_meta
- @scheduler_api.reroute_compute("snapshot")
+ @check_vm_state(state_checker.SNAPSHOT)
+ @scheduler_api.reroute_compute(state_checker.SNAPSHOT)
def snapshot(self, context, instance, name, extra_properties=None):
"""Snapshot the given instance.
@@ -1125,12 +1128,6 @@ class API(base.Base):
task_state = instance["task_state"]
instance_uuid = instance['uuid']
- if task_state == task_states.IMAGE_BACKUP:
- raise exception.InstanceBackingUp(instance_uuid=instance_uuid)
-
- if task_state == task_states.IMAGE_SNAPSHOT:
- raise exception.InstanceSnapshotting(instance_uuid=instance_uuid)
-
properties = {
'instance_uuid': instance_uuid,
'user_id': str(context.user_id),
@@ -1150,7 +1147,8 @@ class API(base.Base):
params=params)
return recv_meta
- @scheduler_api.reroute_compute("reboot")
+ @check_vm_state(state_checker.REBOOT)
+ @scheduler_api.reroute_compute(state_checker.REBOOT)
def reboot(self, context, instance, reboot_type):
"""Reboot the given instance."""
state = {'SOFT': task_states.REBOOTING,
@@ -1164,16 +1162,13 @@ class API(base.Base):
instance['uuid'],
params={'reboot_type': reboot_type})
- @scheduler_api.reroute_compute("rebuild")
+ @check_vm_state(state_checker.REBUILD)
+ @scheduler_api.reroute_compute(state_checker.REBUILD)
def rebuild(self, context, instance, image_href, admin_password,
name=None, metadata=None, files_to_inject=None):
"""Rebuild the given instance with the provided metadata."""
name = name or instance["display_name"]
- if instance["vm_state"] != vm_states.ACTIVE:
- msg = _("Instance must be active to rebuild.")
- raise exception.RebuildRequiresActiveInstance(msg)
-
files_to_inject = files_to_inject or []
metadata = metadata or {}
@@ -1199,7 +1194,8 @@ class API(base.Base):
instance["uuid"],
params=rebuild_params)
- @scheduler_api.reroute_compute("revert_resize")
+ @check_vm_state(state_checker.REVERT_RESIZE)
+ @scheduler_api.reroute_compute(state_checker.REVERT_RESIZE)
def revert_resize(self, context, instance):
"""Reverts a resize, deleting the 'new' instance in the process."""
context = context.elevated()
@@ -1223,7 +1219,8 @@ class API(base.Base):
self.db.migration_update(context, migration_ref['id'],
{'status': 'reverted'})
- @scheduler_api.reroute_compute("confirm_resize")
+ @check_vm_state(state_checker.CONFIRM_RESIZE)
+ @scheduler_api.reroute_compute(state_checker.CONFIRM_RESIZE)
def confirm_resize(self, context, instance):
"""Confirms a migration/resize and deletes the 'old' instance."""
context = context.elevated()
@@ -1249,7 +1246,8 @@ class API(base.Base):
self.db.instance_update(context, instance['uuid'],
{'host': migration_ref['dest_compute'], })
- @scheduler_api.reroute_compute("resize")
+ @check_vm_state(state_checker.RESIZE)
+ @scheduler_api.reroute_compute(state_checker.RESIZE)
def resize(self, context, instance, flavor_id=None):
"""Resize (ie, migrate) a running instance.
@@ -1330,7 +1328,8 @@ class API(base.Base):
# didn't raise so this is the correct zone
self.network_api.add_network_to_project(context, project_id)
- @scheduler_api.reroute_compute("pause")
+ @check_vm_state(state_checker.PAUSE)
+ @scheduler_api.reroute_compute(state_checker.PAUSE)
def pause(self, context, instance):
"""Pause the given instance."""
instance_uuid = instance["uuid"]
@@ -1340,7 +1339,8 @@ class API(base.Base):
task_state=task_states.PAUSING)
self._cast_compute_message('pause_instance', context, instance_uuid)
- @scheduler_api.reroute_compute("unpause")
+ @check_vm_state(state_checker.UNPAUSE)
+ @scheduler_api.reroute_compute(state_checker.UNPAUSE)
def unpause(self, context, instance):
"""Unpause the given instance."""
instance_uuid = instance["uuid"]
@@ -1377,7 +1377,8 @@ class API(base.Base):
"""Retrieve actions for the given instance."""
return self.db.instance_get_actions(context, instance['id'])
- @scheduler_api.reroute_compute("suspend")
+ @check_vm_state(state_checker.SUSPEND)
+ @scheduler_api.reroute_compute(state_checker.SUSPEND)
def suspend(self, context, instance):
"""Suspend the given instance."""
instance_uuid = instance["uuid"]
@@ -1387,7 +1388,8 @@ class API(base.Base):
task_state=task_states.SUSPENDING)
self._cast_compute_message('suspend_instance', context, instance_uuid)
- @scheduler_api.reroute_compute("resume")
+ @check_vm_state(state_checker.RESUME)
+ @scheduler_api.reroute_compute(state_checker.RESUME)
def resume(self, context, instance):
"""Resume the given instance."""
instance_uuid = instance["uuid"]
@@ -1397,7 +1399,8 @@ class API(base.Base):
task_state=task_states.RESUMING)
self._cast_compute_message('resume_instance', context, instance_uuid)
- @scheduler_api.reroute_compute("rescue")
+ @check_vm_state(state_checker.RESCUE)
+ @scheduler_api.reroute_compute(state_checker.RESCUE)
def rescue(self, context, instance, rescue_password=None):
"""Rescue the given instance."""
self.update(context,
@@ -1412,7 +1415,8 @@ class API(base.Base):
instance['uuid'],
params=rescue_params)
- @scheduler_api.reroute_compute("unrescue")
+ @check_vm_state(state_checker.UNRESCUE)
+ @scheduler_api.reroute_compute(state_checker.UNRESCUE)
def unrescue(self, context, instance):
"""Unrescue the given instance."""
self.update(context,
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 38929fb33..3e69c425b 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -297,6 +297,34 @@ class ComputeManager(manager.SchedulerDependentManager):
return (swap, ephemerals, block_device_mapping)
+ def _is_instance_terminated(self, instance_uuid):
+ """Instance in DELETING task state or not found in DB"""
+ context = nova.context.get_admin_context()
+ try:
+ instance = self.db.instance_get_by_uuid(context, instance_uuid)
+ if instance['task_state'] == task_states.DELETING:
+ return True
+ return False
+ except:
+ return True
+
+ def _shutdown_instance_even_if_deleted(self, context, instance_uuid):
+ """Call terminate_instance even for already deleted instances"""
+ LOG.info(_("Going to force the deletion of the vm %(instance_uuid)s, "
+ "even if it is deleted") % locals())
+ try:
+ try:
+ self.terminate_instance(context, instance_uuid)
+ except exception.InstanceNotFound:
+ LOG.info(_("Instance %(instance_uuid)s did not exist in the "
+ "DB, but I will shut it down anyway using a special "
+ "context") % locals())
+ ctxt = nova.context.get_admin_context(True)
+ self.terminate_instance(ctxt, instance_uuid)
+ except Exception as ex:
+ LOG.info(_("exception terminating the instance "
+ "%(instance_id)s") % locals())
+
def _run_instance(self, context, instance_uuid,
requested_networks=None,
injected_files=[],
@@ -320,9 +348,14 @@ class ComputeManager(manager.SchedulerDependentManager):
with utils.save_and_reraise_exception():
self._deallocate_network(context, instance)
self._notify_about_instance_usage(instance)
+ if self._is_instance_terminated(instance_uuid):
+ raise exception.InstanceNotFound
except exception.InstanceNotFound:
LOG.exception(_("Instance %s not found.") % instance_uuid)
- return # assuming the instance was already deleted
+ # assuming the instance was already deleted, run "delete" again
+ # just in case
+ self._shutdown_instance_even_if_deleted(context, instance_uuid)
+ return
except Exception as e:
with utils.save_and_reraise_exception():
self._instance_update(context, instance_uuid,
diff --git a/nova/compute/state_checker.py b/nova/compute/state_checker.py
new file mode 100644
index 000000000..9dcdebe8c
--- /dev/null
+++ b/nova/compute/state_checker.py
@@ -0,0 +1,137 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2010 United States Government as represented by the
+# Administrator of the National Aeronautics and Space Administration.
+# Copyright 2011 Justin Santa Barbara
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from nova import db
+from nova.compute import task_states as ts
+from nova.compute import vm_states as vm
+from nova import context as ctxt
+
+# Function names that run the state check before their execution:
+REBOOT = 'reboot'
+START = 'start'
+REBUILD = 'rebuild'
+STOP = 'stop'
+PAUSE = 'pause'
+BACKUP = 'backup'
+UNPAUSE = 'unpause'
+SUSPEND = 'suspend'
+RESUME = 'resume'
+RESCUE = 'rescue'
+UNRESCUE = 'unrescue'
+SNAPSHOT = 'snapshot'
+RESIZE = 'resize'
+CONFIRM_RESIZE = 'confirm_resize'
+REVERT_RESIZE = 'revert_resize'
+DELETE = 'delete'
+SOFT_DELETE = 'soft_delete'
+FORCE_DELETE = 'force_delete'
+RESTORE = 'restore'
+
+
+# Aux variables to save cpu time, used by blocker dictionaries
+all_ts_but_resize_verify = list(set(ts.get_list()) - set([ts.RESIZE_VERIFY]))
+all_vm_but_act_resc = list(set(vm.get_list()) - set([vm.ACTIVE, vm.RESCUED]))
+all_vm_but_active = list(set(vm.get_list()) - set([vm.ACTIVE]))
+
+# Call blocked if the vm task_state is found in the corresponding list
+block_for_task_state = {
+ REBOOT: all_ts_but_resize_verify,
+ START: all_ts_but_resize_verify,
+ REBUILD: all_ts_but_resize_verify,
+ PAUSE: all_ts_but_resize_verify,
+ STOP: all_ts_but_resize_verify,
+ UNPAUSE: all_ts_but_resize_verify,
+ SUSPEND: all_ts_but_resize_verify,
+ RESUME: all_ts_but_resize_verify,
+ RESCUE: all_ts_but_resize_verify,
+ UNRESCUE: all_ts_but_resize_verify,
+ SNAPSHOT: all_ts_but_resize_verify,
+ BACKUP: all_ts_but_resize_verify,
+ RESIZE: all_ts_but_resize_verify,
+ CONFIRM_RESIZE: all_ts_but_resize_verify,
+ REVERT_RESIZE: all_ts_but_resize_verify}
+
+# Call blocked if the vm vm_state is found in the corresponding list
+block_for_vm_state = {
+ REBOOT: all_vm_but_act_resc,
+ START: list(set(vm.get_list()) - set([vm.STOPPED])),
+ REBUILD: all_vm_but_active,
+ PAUSE: all_vm_but_act_resc,
+ STOP: all_vm_but_act_resc,
+ UNPAUSE: list(set(vm.get_list()) - set([vm.PAUSED])),
+ SUSPEND: all_vm_but_act_resc,
+ RESUME: list(set(vm.get_list()) - set([vm.SUSPENDED])),
+ RESCUE: list(set(vm.get_list()) - set([vm.ACTIVE, vm.STOPPED])),
+ UNRESCUE: list(set(vm.get_list()) - set([vm.ACTIVE, vm.RESCUED])),
+ SNAPSHOT: all_vm_but_active,
+ BACKUP: all_vm_but_active,
+ RESIZE: all_vm_but_active,
+ CONFIRM_RESIZE: all_vm_but_active,
+ REVERT_RESIZE: all_vm_but_active}
+
+# Call blocked if the combination of vm_state, power_state and task_state is
+# found in the corresponding list
+block_for_combination = {
+ CONFIRM_RESIZE: [{'vm_state': vm.ACTIVE, 'task_state': None}],
+ REVERT_RESIZE: [{'vm_state': vm.ACTIVE, 'task_state': None}]}
+
+
+def is_blocked(method_name, context, instance_ref):
+ """
+ Is the method blocked for the VM state?
+
+ This method returns False if the state of the vm is found
+ in the blocked dictionaries for the method.
+ """
+ if instance_ref['task_state'] in block_for_task_state.get(method_name, ()):
+ return True
+ if instance_ref['vm_state'] in block_for_vm_state.get(method_name, ()):
+ return True
+ if method_name in block_for_combination:
+ return _is_combination_blocked(method_name, instance_ref)
+ # Allow the method if not found in any list
+ return False
+
+
+def _is_combination_blocked(method_name, instance_ref):
+ """
+ Is the method blocked according to the blocked_combination dictionary?
+
+ To be blocked, all the elements
+ in a dictionary need to match the vm states.
+ If a value is not present in a dictionary we assume that the dictionary
+ applies for any value of that particular element
+ """
+ for blocked_element in block_for_combination[method_name]:
+ # Check power state
+ if 'power_state' in blocked_element and instance_ref['power_state']\
+ != blocked_element['power_state']:
+ continue
+ # Check vm state
+ if 'vm_state' in blocked_element and instance_ref['vm_state']\
+ != blocked_element['vm_state']:
+ continue
+ # Check task state
+ if 'task_state' in blocked_element and instance_ref['task_state']\
+ != blocked_element['task_state']:
+ continue
+ return True
+ # After analyzing all the dictionaries for the method, none tells us to
+ # block the function
+ return False
diff --git a/nova/compute/task_states.py b/nova/compute/task_states.py
index c6016b509..3765fc79c 100644
--- a/nova/compute/task_states.py
+++ b/nova/compute/task_states.py
@@ -60,3 +60,14 @@ UNRESCUING = 'unrescuing'
DELETING = 'deleting'
STOPPING = 'stopping'
STARTING = 'starting'
+
+
+def get_list():
+ """Returns a list of all the possible task_states"""
+ return [SCHEDULING, BLOCK_DEVICE_MAPPING, NETWORKING, SPAWNING,
+ IMAGE_SNAPSHOT, IMAGE_BACKUP, UPDATING_PASSWORD, RESIZE_PREP,
+ RESIZE_MIGRATING, RESIZE_MIGRATED, RESIZE_FINISH, RESIZE_REVERTING,
+ RESIZE_CONFIRMING, RESIZE_VERIFY, REBUILDING, REBOOTING,
+ REBOOTING_HARD, PAUSING, UNPAUSING, SUSPENDING, RESUMING,
+ POWERING_OFF, POWERING_ON, RESCUING, UNRESCUING, DELETING,
+ STOPPING, STARTING]
diff --git a/nova/compute/vm_states.py b/nova/compute/vm_states.py
index f219bf7f4..4d3d524c5 100644
--- a/nova/compute/vm_states.py
+++ b/nova/compute/vm_states.py
@@ -38,3 +38,9 @@ MIGRATING = 'migrating'
RESIZING = 'resizing'
ERROR = 'error'
+
+
+def get_list():
+ """Returns a list of all the possible vm_states"""
+ return [ACTIVE, BUILDING, REBUILDING, PAUSED, SUSPENDED, RESCUED,
+ DELETED, STOPPED, SOFT_DELETE, MIGRATING, RESIZING, ERROR]