summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Lamar <brian.lamar@rackspace.com>2011-08-17 16:23:40 -0400
committerBrian Lamar <brian.lamar@rackspace.com>2011-08-17 16:23:40 -0400
commit1d1d027554d6be355bd9b52b2d87081d06f05045 (patch)
tree5aab070465f439eef7a0b147abc09e8368dffee2
parentbd2e98c064b7c1e9c866f3013e13af7883e11e05 (diff)
Updated compute manager/API to use vm/task states.
Updated vm/task states to cover a few more cases I encountered.
-rw-r--r--nova/compute/api.py57
-rw-r--r--nova/compute/manager.py441
-rw-r--r--nova/compute/task_state.py17
-rw-r--r--nova/compute/vm_state.py8
4 files changed, 296 insertions, 227 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index e909e9959..ec760853e 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -36,6 +36,7 @@ from nova import utils
from nova import volume
from nova.compute import instance_types
from nova.compute import power_state
+from nova.compute import vm_state
from nova.compute.utils import terminate_volumes
from nova.scheduler import api as scheduler_api
from nova.db import base
@@ -74,10 +75,13 @@ def generate_default_hostname(instance):
def _is_able_to_shutdown(instance, instance_id):
- states = {'terminating': "Instance %s is already being terminated",
- 'migrating': "Instance %s is being migrated",
- 'stopping': "Instance %s is being stopped"}
- msg = states.get(instance['state_description'])
+ states = {
+ vm_state.DELETE: "Instance %s is already being terminated",
+ vm_state.MIGRATE: "Instance %s is being migrated",
+ vm_state.RESIZE: "Instance %s is being resized",
+ vm_state.STOP: "Instance %s is being stopped",
+ }
+ msg = states.get(instance['vm_state'])
if msg:
LOG.warning(_(msg), instance_id)
return False
@@ -231,8 +235,8 @@ class API(base.Base):
'image_ref': image_href,
'kernel_id': kernel_id or '',
'ramdisk_id': ramdisk_id or '',
- 'state': 0,
- 'state_description': 'scheduling',
+ 'power_state': power_state.NOSTATE,
+ 'vm_state': vm_state.BUILD,
'user_id': context.user_id,
'project_id': context.project_id,
'launch_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
@@ -648,9 +652,8 @@ class API(base.Base):
return
self.update(context,
- instance['id'],
- state_description='terminating',
- state=0,
+ instance_id,
+ vm_state=vm_state.DELETE,
terminated_at=utils.utcnow())
host = instance['host']
@@ -671,9 +674,8 @@ class API(base.Base):
return
self.update(context,
- instance['id'],
- state_description='stopping',
- state=power_state.NOSTATE,
+ instance_id,
+ vm_state=vm_state.STOP,
terminated_at=utils.utcnow())
host = instance['host']
@@ -685,12 +687,15 @@ class API(base.Base):
"""Start an instance."""
LOG.debug(_("Going to try to start %s"), instance_id)
instance = self._get_instance(context, instance_id, 'starting')
- if instance['state_description'] != 'stopped':
- _state_description = instance['state_description']
+ vm_state = instance["vm_state"]
+
+ if vm_state != vm_state.STOP:
LOG.warning(_("Instance %(instance_id)s is not "
- "stopped(%(_state_description)s)") % locals())
+ "stopped. (%(vm_state)s)") % locals())
return
+ self.update(context, instance_id, vm_state=vm_state.ACTIVE)
+
# TODO(yamahata): injected_files isn't supported right now.
# It is used only for osapi. not for ec2 api.
# availability_zone isn't used by run_instance.
@@ -918,6 +923,7 @@ class API(base.Base):
@scheduler_api.reroute_compute("reboot")
def reboot(self, context, instance_id):
"""Reboot the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.REBOOT)
self._cast_compute_message('reboot_instance', context, instance_id)
@scheduler_api.reroute_compute("rebuild")
@@ -925,8 +931,12 @@ class API(base.Base):
metadata=None, files_to_inject=None):
"""Rebuild the given instance with the provided metadata."""
instance = db.api.instance_get(context, instance_id)
+ invalid_rebuild_states = [
+ vm_state.BUILD,
+ vm_state.REBUILD,
+ ]
- if instance["state"] == power_state.BUILDING:
+ if instance["vm_state"] in invalid_rebuild_states:
msg = _("Instance already building")
raise exception.BuildInProgress(msg)
@@ -946,6 +956,8 @@ class API(base.Base):
"injected_files": files_to_inject,
}
+ self.update(context, instance_id, vm_state=vm_state.REBUILD)
+
self._cast_compute_message('rebuild_instance',
context,
instance_id,
@@ -963,6 +975,8 @@ class API(base.Base):
raise exception.MigrationNotFoundByStatus(instance_id=instance_id,
status='finished')
+ self.update(context, instance_id, vm_state=vm_state.ACTIVE)
+
params = {'migration_id': migration_ref['id']}
self._cast_compute_message('revert_resize', context,
instance_ref['uuid'],
@@ -983,6 +997,9 @@ class API(base.Base):
if not migration_ref:
raise exception.MigrationNotFoundByStatus(instance_id=instance_id,
status='finished')
+
+ self.update(context, instance_id, vm_state=vm_state.ACTIVE)
+
params = {'migration_id': migration_ref['id']}
self._cast_compute_message('confirm_resize', context,
instance_ref['uuid'],
@@ -1028,6 +1045,8 @@ class API(base.Base):
if (current_memory_mb == new_memory_mb) and flavor_id:
raise exception.CannotResizeToSameSize()
+ self.update(context, instance_id, vm_state=vm_state.RESIZE)
+
instance_ref = self._get_instance(context, instance_id, 'resize')
self._cast_scheduler_message(context,
{"method": "prep_resize",
@@ -1061,11 +1080,13 @@ class API(base.Base):
@scheduler_api.reroute_compute("pause")
def pause(self, context, instance_id):
"""Pause the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.PAUSE)
self._cast_compute_message('pause_instance', context, instance_id)
@scheduler_api.reroute_compute("unpause")
def unpause(self, context, instance_id):
"""Unpause the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.ACTIVE)
self._cast_compute_message('unpause_instance', context, instance_id)
def set_host_enabled(self, context, host, enabled):
@@ -1092,21 +1113,25 @@ class API(base.Base):
@scheduler_api.reroute_compute("suspend")
def suspend(self, context, instance_id):
"""Suspend the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.SUSPEND)
self._cast_compute_message('suspend_instance', context, instance_id)
@scheduler_api.reroute_compute("resume")
def resume(self, context, instance_id):
"""Resume the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.ACTIVE)
self._cast_compute_message('resume_instance', context, instance_id)
@scheduler_api.reroute_compute("rescue")
def rescue(self, context, instance_id):
"""Rescue the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.RESCUE)
self._cast_compute_message('rescue_instance', context, instance_id)
@scheduler_api.reroute_compute("unrescue")
def unrescue(self, context, instance_id):
"""Unrescue the given instance."""
+ self.update(context, instance_id, vm_state=vm_state.ACTIVE)
self._cast_compute_message('unrescue_instance', context, instance_id)
@scheduler_api.reroute_compute("set_admin_password")
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 3299268f2..34c6bc1ea 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -56,6 +56,8 @@ from nova import rpc
from nova import utils
from nova import volume
from nova.compute import power_state
+from nova.compute import task_state
+from nova.compute import vm_state
from nova.notifier import api as notifier
from nova.compute.utils import terminate_volumes
from nova.virt import driver
@@ -146,6 +148,10 @@ class ComputeManager(manager.SchedulerDependentManager):
super(ComputeManager, self).__init__(service_name="compute",
*args, **kwargs)
+ def _instance_update(self, context, instance_id, **kwargs):
+ """Update an instance in the database using kwargs as value."""
+ return self.db.instance_update(context, instance_id, kwargs)
+
def init_host(self):
"""Initialization for a standalone compute service."""
self.driver.init_host(host=self.host)
@@ -153,8 +159,8 @@ class ComputeManager(manager.SchedulerDependentManager):
instances = self.db.instance_get_all_by_host(context, self.host)
for instance in instances:
inst_name = instance['name']
- db_state = instance['state']
- drv_state = self._update_state(context, instance['id'])
+ db_state = instance['power_state']
+ drv_state = self._get_power_state(context, instance)
expect_running = db_state == power_state.RUNNING \
and drv_state != db_state
@@ -177,34 +183,13 @@ class ComputeManager(manager.SchedulerDependentManager):
LOG.warning(_('Hypervisor driver does not '
'support firewall rules'))
- def _update_state(self, context, instance_id, state=None):
- """Update the state of an instance from the driver info."""
- instance_ref = self.db.instance_get(context, instance_id)
-
- if state is None:
- try:
- LOG.debug(_('Checking state of %s'), instance_ref['name'])
- info = self.driver.get_info(instance_ref['name'])
- except exception.NotFound:
- info = None
-
- if info is not None:
- state = info['state']
- else:
- state = power_state.FAILED
-
- self.db.instance_set_state(context, instance_id, state)
- return state
-
- def _update_launched_at(self, context, instance_id, launched_at=None):
- """Update the launched_at parameter of the given instance."""
- data = {'launched_at': launched_at or utils.utcnow()}
- self.db.instance_update(context, instance_id, data)
-
- def _update_image_ref(self, context, instance_id, image_ref):
- """Update the image_id for the given instance."""
- data = {'image_ref': image_ref}
- self.db.instance_update(context, instance_id, data)
+ def _get_power_state(self, context, instance):
+ """Retrieve the power state for the given instance."""
+ LOG.debug(_('Checking state of %s'), instance['name'])
+ try:
+ return self.driver.get_info(instance['name'])["state"]
+ except exception.NotFound:
+ return power_state.FAILED
def get_console_topic(self, context, **kwargs):
"""Retrieves the console host for a project on this host.
@@ -388,13 +373,10 @@ class ComputeManager(manager.SchedulerDependentManager):
# NOTE(vish): used by virt but not in database
updates['injected_files'] = kwargs.get('injected_files', [])
updates['admin_pass'] = kwargs.get('admin_password', None)
- instance = self.db.instance_update(context,
- instance_id,
- updates)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'networking')
+ updates['vm_state'] = vm_state.BUILD
+ updates['task_state'] = task_state.NETWORKING
+
+ instance = self.db.instance_update(context, instance_id, updates)
is_vpn = instance['image_ref'] == str(FLAGS.vpn_image_id)
try:
@@ -413,6 +395,11 @@ class ComputeManager(manager.SchedulerDependentManager):
# all vif creation and network injection, maybe this is correct
network_info = []
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.BUILD,
+ task_state=task_state.BLOCK_DEVICE_MAPPING)
+
(swap, ephemerals,
block_device_mapping) = self._setup_block_device_mapping(
context, instance_id)
@@ -422,9 +409,11 @@ class ComputeManager(manager.SchedulerDependentManager):
'ephemerals': ephemerals,
'block_device_mapping': block_device_mapping}
- # TODO(vish) check to make sure the availability zone matches
- self._update_state(context, instance_id, power_state.BUILDING)
+ self._instance_update(context,
+ instance_id,
+ task_state=task_state.SPAWN)
+ # TODO(vish) check to make sure the availability zone matches
try:
self.driver.spawn(context, instance,
network_info, block_device_info)
@@ -433,13 +422,21 @@ class ComputeManager(manager.SchedulerDependentManager):
"virtualization enabled in the BIOS? Details: "
"%(ex)s") % locals()
LOG.exception(msg)
+ return
+
+ current_power_state = self._get_power_state(context, instance)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=None,
+ launched_at=utils.utcnow())
- self._update_launched_at(context, instance_id)
- self._update_state(context, instance_id)
usage_info = utils.usage_from_instance(instance)
notifier.notify('compute.%s' % self.host,
'compute.instance.create',
notifier.INFO, usage_info)
+
except exception.InstanceNotFound:
# FIXME(wwolf): We are just ignoring InstanceNotFound
# exceptions here in case the instance was immediately
@@ -523,11 +520,22 @@ class ComputeManager(manager.SchedulerDependentManager):
instance_ref = self.db.instance_get(context, instance_id)
LOG.audit(_("Rebuilding instance %s"), instance_id, context=context)
- self._update_state(context, instance_id, power_state.BUILDING)
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.REBUILD,
+ task_state=task_state.REBUILDING)
network_info = self._get_instance_nw_info(context, instance_ref)
-
self.driver.destroy(instance_ref, network_info)
+
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.REBUILD,
+ task_state=task_state.SPAWN)
+
image_ref = kwargs.get('image_ref')
instance_ref.image_ref = image_ref
instance_ref.injected_files = kwargs.get('injected_files', [])
@@ -536,9 +544,15 @@ class ComputeManager(manager.SchedulerDependentManager):
bd_mapping = self._setup_block_device_mapping(context, instance_id)
self.driver.spawn(context, instance_ref, network_info, bd_mapping)
- self._update_image_ref(context, instance_id, image_ref)
- self._update_launched_at(context, instance_id)
- self._update_state(context, instance_id)
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=None,
+ image_ref=image_ref,
+ launched_at=utils.utcnow())
+
usage_info = utils.usage_from_instance(instance_ref,
image_ref=image_ref)
notifier.notify('compute.%s' % self.host,
@@ -550,26 +564,34 @@ class ComputeManager(manager.SchedulerDependentManager):
@checks_instance_lock
def reboot_instance(self, context, instance_id):
"""Reboot an instance on this host."""
+ LOG.audit(_("Rebooting instance %s"), instance_id, context=context)
context = context.elevated()
- self._update_state(context, instance_id)
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_("Rebooting instance %s"), instance_id, context=context)
- if instance_ref['state'] != power_state.RUNNING:
- state = instance_ref['state']
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.REBOOT,
+ task_state=task_state.REBOOTING)
+
+ if instance_ref['power_state'] != power_state.RUNNING:
+ state = instance_ref['power_state']
running = power_state.RUNNING
LOG.warn(_('trying to reboot a non-running '
'instance: %(instance_id)s (state: %(state)s '
'expected: %(running)s)') % locals(),
context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'rebooting')
network_info = self._get_instance_nw_info(context, instance_ref)
self.driver.reboot(instance_ref, network_info)
- self._update_state(context, instance_id)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=None)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
def snapshot_instance(self, context, instance_id, image_id,
@@ -585,37 +607,41 @@ class ComputeManager(manager.SchedulerDependentManager):
:param rotation: int representing how many backups to keep around;
None if rotation shouldn't be used (as in the case of snapshots)
"""
+ if image_type != "snapshot" and image_type != "backup":
+ raise Exception(_('Image type not recognized %s') % image_type)
+
context = context.elevated()
instance_ref = self.db.instance_get(context, instance_id)
- #NOTE(sirp): update_state currently only refreshes the state field
- # if we add is_snapshotting, we will need this refreshed too,
- # potentially?
- self._update_state(context, instance_id)
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=image_type)
LOG.audit(_('instance %s: snapshotting'), instance_id,
context=context)
- if instance_ref['state'] != power_state.RUNNING:
- state = instance_ref['state']
+
+ if instance_ref['power_state'] != power_state.RUNNING:
+ state = instance_ref['power_state']
running = power_state.RUNNING
LOG.warn(_('trying to snapshot a non-running '
'instance: %(instance_id)s (state: %(state)s '
'expected: %(running)s)') % locals())
self.driver.snapshot(context, instance_ref, image_id)
+ self._instance_update(context, instance_id, task_state=None)
+
+ if image_type == 'snapshot' and rotation:
+ raise exception.ImageRotationNotAllowed()
+
+ elif image_type == 'backup' and rotation:
+ instance_uuid = instance_ref['uuid']
+ self.rotate_backups(context, instance_uuid, backup_type, rotation)
- if image_type == 'snapshot':
- if rotation:
- raise exception.ImageRotationNotAllowed()
elif image_type == 'backup':
- if rotation:
- instance_uuid = instance_ref['uuid']
- self.rotate_backups(context, instance_uuid, backup_type,
- rotation)
- else:
- raise exception.RotationRequiredForBackup()
- else:
- raise Exception(_('Image type not recognized %s') % image_type)
+ raise exception.RotationRequiredForBackup()
def rotate_backups(self, context, instance_uuid, backup_type, rotation):
"""Delete excess backups associated to an instance.
@@ -751,40 +777,51 @@ class ComputeManager(manager.SchedulerDependentManager):
@checks_instance_lock
def rescue_instance(self, context, instance_id):
"""Rescue an instance on this host."""
+ LOG.audit(_('instance %s: rescuing'), instance_id, context=context)
context = context.elevated()
+
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.RESCUE,
+ task_state=task_state.RESCUING)
+
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_('instance %s: rescuing'), instance_id, context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'rescuing')
- _update_state = lambda result: self._update_state_callback(
- self, context, instance_id, result)
network_info = self._get_instance_nw_info(context, instance_ref)
- self.driver.rescue(context, instance_ref, _update_state, network_info)
- self._update_state(context, instance_id)
+
+ # NOTE(blamar): None of the virt drivers use the 'callback' param
+ self.driver.rescue(context, instance_ref, None, network_info)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.RESCUE,
+ task_state=task_state.RESCUED,
+ power_state=current_power_state)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
def unrescue_instance(self, context, instance_id):
"""Rescue an instance on this host."""
+ LOG.audit(_('instance %s: unrescuing'), instance_id, context=context)
context = context.elevated()
+
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.ACTIVE,
+ task_state=task_state.UNRESCUING)
+
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_('instance %s: unrescuing'), instance_id, context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'unrescuing')
- _update_state = lambda result: self._update_state_callback(
- self, context, instance_id, result)
network_info = self._get_instance_nw_info(context, instance_ref)
- self.driver.unrescue(instance_ref, _update_state, network_info)
- self._update_state(context, instance_id)
- @staticmethod
- def _update_state_callback(self, context, instance_id, result):
- """Update instance state when async task completes."""
- self._update_state(context, instance_id)
+ # NOTE(blamar): None of the virt drivers use the 'callback' param
+ self.driver.unrescue(instance_ref, None, network_info)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.ACTIVE,
+ task_state=None,
+ power_state=current_power_state)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
@@ -843,11 +880,12 @@ class ComputeManager(manager.SchedulerDependentManager):
# Just roll back the record. There's no need to resize down since
# the 'old' VM already has the preferred attributes
- self.db.instance_update(context, instance_ref['uuid'],
- dict(memory_mb=instance_type['memory_mb'],
- vcpus=instance_type['vcpus'],
- local_gb=instance_type['local_gb'],
- instance_type_id=instance_type['id']))
+ self._instance_update(context,
+ instance_ref["uuid"],
+ memory_mb=instance_type['memory_mb'],
+ vcpus=instance_type['vcpus'],
+ local_gb=instance_type['local_gb'],
+ instance_type_id=instance_type['id'])
self.driver.revert_migration(instance_ref)
self.db.migration_update(context, migration_id,
@@ -1000,35 +1038,45 @@ class ComputeManager(manager.SchedulerDependentManager):
@checks_instance_lock
def pause_instance(self, context, instance_id):
"""Pause an instance on this host."""
+ LOG.audit(_('instance %s: pausing'), instance_id, context=context)
context = context.elevated()
+
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.PAUSE,
+ task_state=task_state.PAUSING)
+
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_('instance %s: pausing'), instance_id, context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'pausing')
- self.driver.pause(instance_ref,
- lambda result: self._update_state_callback(self,
- context,
- instance_id,
- result))
+ self.driver.pause(instance_ref, lambda result: None)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.PAUSE,
+ task_state=None)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
def unpause_instance(self, context, instance_id):
"""Unpause a paused instance on this host."""
+ LOG.audit(_('instance %s: unpausing'), instance_id, context=context)
context = context.elevated()
+
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.ACTIVE,
+ task_state=task_state.UNPAUSING)
+
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_('instance %s: unpausing'), instance_id, context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'unpausing')
- self.driver.unpause(instance_ref,
- lambda result: self._update_state_callback(self,
- context,
- instance_id,
- result))
+ self.driver.unpause(instance_ref, lambda result: None)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=None)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
def host_power_action(self, context, host=None, action=None):
@@ -1053,33 +1101,45 @@ class ComputeManager(manager.SchedulerDependentManager):
@checks_instance_lock
def suspend_instance(self, context, instance_id):
"""Suspend the given instance."""
+ LOG.audit(_('instance %s: suspending'), instance_id, context=context)
context = context.elevated()
+
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.SUSPEND,
+ task_state=task_state.SUSPENDING)
+
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_('instance %s: suspending'), instance_id, context=context)
- self.db.instance_set_state(context, instance_id,
- power_state.NOSTATE,
- 'suspending')
- self.driver.suspend(instance_ref,
- lambda result: self._update_state_callback(self,
- context,
- instance_id,
- result))
+ self.driver.suspend(instance_ref, lambda result: None)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.SUSPEND,
+ task_state=None)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@checks_instance_lock
def resume_instance(self, context, instance_id):
"""Resume the given suspended instance."""
+ LOG.audit(_('instance %s: resuming'), instance_id, context=context)
context = context.elevated()
+
+ self._instance_update(context,
+ instance_id,
+ vm_state=vm_state.ACTIVE,
+ task_state=task_state.RESUMING)
+
instance_ref = self.db.instance_get(context, instance_id)
- LOG.audit(_('instance %s: resuming'), instance_id, context=context)
- self.db.instance_set_state(context, instance_id,
- power_state.NOSTATE,
- 'resuming')
- self.driver.resume(instance_ref,
- lambda result: self._update_state_callback(self,
- context,
- instance_id,
- result))
+ self.driver.resume(instance_ref, lambda result: None)
+
+ current_power_state = self._get_power_state(context, instance_ref)
+ self._instance_update(context,
+ instance_id,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=None)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
def lock_instance(self, context, instance_id):
@@ -1489,11 +1549,14 @@ class ComputeManager(manager.SchedulerDependentManager):
'block_migration': block_migration}})
# Restore instance state
- self.db.instance_update(ctxt,
- instance_ref['id'],
- {'state_description': 'running',
- 'state': power_state.RUNNING,
- 'host': dest})
+ current_power_state = self._get_power_state(ctxt, instance_ref)
+ self._instance_update(ctxt,
+ instance_ref["id"],
+ host=dest,
+ power_state=current_power_state,
+ vm_state=vm_state.ACTIVE,
+ task_state=None)
+
# Restore volume state
for volume_ref in instance_ref['volumes']:
volume_id = volume_ref['id']
@@ -1539,11 +1602,11 @@ class ComputeManager(manager.SchedulerDependentManager):
This param specifies destination host.
"""
host = instance_ref['host']
- self.db.instance_update(context,
- instance_ref['id'],
- {'state_description': 'running',
- 'state': power_state.RUNNING,
- 'host': host})
+ self._instance_update(context,
+ instance_ref['id'],
+ host=host,
+ vm_state=vm_state.ACTIVE,
+ task_state=None)
for volume_ref in instance_ref['volumes']:
volume_id = volume_ref['id']
@@ -1591,10 +1654,9 @@ class ComputeManager(manager.SchedulerDependentManager):
error_list.append(ex)
try:
- self._poll_instance_states(context)
+ self._sync_power_states(context)
except Exception as ex:
- LOG.warning(_("Error during instance poll: %s"),
- unicode(ex))
+ LOG.warning(_("Error during power_state sync: %s"), unicode(ex))
error_list.append(ex)
return error_list
@@ -1609,68 +1671,39 @@ class ComputeManager(manager.SchedulerDependentManager):
self.update_service_capabilities(
self.driver.get_host_stats(refresh=True))
- def _poll_instance_states(self, context):
- vm_instances = self.driver.list_instances_detail()
- vm_instances = dict((vm.name, vm) for vm in vm_instances)
+ def _sync_power_states(self, context):
+ """Align power states between the database and the hypervisor.
- # Keep a list of VMs not in the DB, cross them off as we find them
- vms_not_found_in_db = list(vm_instances.keys())
+ The hypervisor is authoritative for the power_state data, so we
+ simply loop over all known instances for this host and update the
+ power_state according to the hypervisor. If the instance is not found
+ then it will be set to power_state.NOSTATE, because it doesn't exist
+ on the hypervisor.
+ """
+ vm_instances = self.driver.list_instances_detail()
db_instances = self.db.instance_get_all_by_host(context, self.host)
+ num_vm_instances = len(vm_instances)
+ num_db_instances = len(db_instances)
+
+ if num_vm_instances != num_db_instances:
+ LOG.info(_("Found %(num_db_instances)s in the database and "
+ "%(num_vm_instances)s on the hypervisor.") % locals())
+
for db_instance in db_instances:
- name = db_instance['name']
- db_state = db_instance['state']
+ name = db_instance["name"]
+ db_power_state = db_instance['power_state']
vm_instance = vm_instances.get(name)
if vm_instance is None:
- # NOTE(justinsb): We have to be very careful here, because a
- # concurrent operation could be in progress (e.g. a spawn)
- if db_state == power_state.BUILDING:
- # TODO(justinsb): This does mean that if we crash during a
- # spawn, the machine will never leave the spawning state,
- # but this is just the way nova is; this function isn't
- # trying to correct that problem.
- # We could have a separate task to correct this error.
- # TODO(justinsb): What happens during a live migration?
- LOG.info(_("Found instance '%(name)s' in DB but no VM. "
- "State=%(db_state)s, so assuming spawn is in "
- "progress.") % locals())
- vm_state = db_state
- else:
- LOG.info(_("Found instance '%(name)s' in DB but no VM. "
- "State=%(db_state)s, so setting state to "
- "shutoff.") % locals())
- vm_state = power_state.SHUTOFF
- if db_instance['state_description'] == 'stopping':
- self.db.instance_stop(context, db_instance['id'])
- continue
+ vm_power_state = power_state.NOSTATE
else:
- vm_state = vm_instance.state
- vms_not_found_in_db.remove(name)
-
- if (db_instance['state_description'] in ['migrating', 'stopping']):
- # A situation which db record exists, but no instance"
- # sometimes occurs while live-migration at src compute,
- # this case should be ignored.
- LOG.debug(_("Ignoring %(name)s, as it's currently being "
- "migrated.") % locals())
- continue
-
- if vm_state != db_state:
- LOG.info(_("DB/VM state mismatch. Changing state from "
- "'%(db_state)s' to '%(vm_state)s'") % locals())
- self._update_state(context, db_instance['id'], vm_state)
+ vm_power_state = vm_instance["power_state"]
- # NOTE(justinsb): We no longer auto-remove SHUTOFF instances
- # It's quite hard to get them back when we do.
-
- # Are there VMs not in the DB?
- for vm_not_found_in_db in vms_not_found_in_db:
- name = vm_not_found_in_db
+ if vm_power_state == db_power_state:
+ continue
- # We only care about instances that compute *should* know about
- if name.startswith("instance-"):
- # TODO(justinsb): What to do here? Adopt it? Shut it down?
- LOG.warning(_("Found VM not in DB: '%(name)s'. Ignoring")
- % locals())
+ self._instance_update(context,
+ db_instance["id"],
+ power_state=vm_power_state)
diff --git a/nova/compute/task_state.py b/nova/compute/task_state.py
index b4dc9af51..55466c783 100644
--- a/nova/compute/task_state.py
+++ b/nova/compute/task_state.py
@@ -17,12 +17,27 @@
"""Possible task states for instances"""
-BUILD_BLOCK_DEVICE_MAPPING='block_device_mapping'
+BLOCK_DEVICE_MAPPING='block_device_mapping'
NETWORKING='networking'
+SPAWN='spawn'
+SNAPSHOT='snapshot'
+BACKUP='backup'
PASSWORD='password'
RESIZE_PREP='resize_prep'
RESIZE_MIGRATING='resize_migrating'
RESIZE_MIGRATED='resize_migrated'
RESIZE_FINISH='resize_finish'
+
+REBUILDING='rebuilding'
+
+REBOOTING='rebooting'
+PAUSING='pausing'
+UNPAUSING='unpausing'
+SUSPENDING='suspending'
+RESUMING='resuming'
+
+RESCUING='rescuing'
+RESCUED='rescued'
+UNRESCUING='unrescuing'
diff --git a/nova/compute/vm_state.py b/nova/compute/vm_state.py
index e81cba1f0..a1bca6ef4 100644
--- a/nova/compute/vm_state.py
+++ b/nova/compute/vm_state.py
@@ -17,19 +17,15 @@
"""Possible vm states for instances"""
+ACTIVE='active'
BUILD='build'
REBUILD='rebuild'
REBOOT='reboot'
DELETE='delete'
STOP='stop'
-START='start'
+MIGRATE='migrate'
RESIZE='resize'
VERIFY_RESIZE='verify_resize'
PAUSE='pause'
-UNPAUSE='unpause'
-
SUSPEND='suspend'
-RESUME='resume'
-
RESCUE='rescue'
-UNRESCUE='unrescue'