diff options
| author | Brian Lamar <brian.lamar@rackspace.com> | 2011-08-17 16:23:40 -0400 |
|---|---|---|
| committer | Brian Lamar <brian.lamar@rackspace.com> | 2011-08-17 16:23:40 -0400 |
| commit | 1d1d027554d6be355bd9b52b2d87081d06f05045 (patch) | |
| tree | 5aab070465f439eef7a0b147abc09e8368dffee2 | |
| parent | bd2e98c064b7c1e9c866f3013e13af7883e11e05 (diff) | |
Updated compute manager/API to use vm/task states.
Updated vm/task states to cover a few more cases I encountered.
| -rw-r--r-- | nova/compute/api.py | 57 | ||||
| -rw-r--r-- | nova/compute/manager.py | 441 | ||||
| -rw-r--r-- | nova/compute/task_state.py | 17 | ||||
| -rw-r--r-- | nova/compute/vm_state.py | 8 |
4 files changed, 296 insertions, 227 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py index e909e9959..ec760853e 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -36,6 +36,7 @@ from nova import utils from nova import volume from nova.compute import instance_types from nova.compute import power_state +from nova.compute import vm_state from nova.compute.utils import terminate_volumes from nova.scheduler import api as scheduler_api from nova.db import base @@ -74,10 +75,13 @@ def generate_default_hostname(instance): def _is_able_to_shutdown(instance, instance_id): - states = {'terminating': "Instance %s is already being terminated", - 'migrating': "Instance %s is being migrated", - 'stopping': "Instance %s is being stopped"} - msg = states.get(instance['state_description']) + states = { + vm_state.DELETE: "Instance %s is already being terminated", + vm_state.MIGRATE: "Instance %s is being migrated", + vm_state.RESIZE: "Instance %s is being resized", + vm_state.STOP: "Instance %s is being stopped", + } + msg = states.get(instance['vm_state']) if msg: LOG.warning(_(msg), instance_id) return False @@ -231,8 +235,8 @@ class API(base.Base): 'image_ref': image_href, 'kernel_id': kernel_id or '', 'ramdisk_id': ramdisk_id or '', - 'state': 0, - 'state_description': 'scheduling', + 'power_state': power_state.NOSTATE, + 'vm_state': vm_state.BUILD, 'user_id': context.user_id, 'project_id': context.project_id, 'launch_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()), @@ -648,9 +652,8 @@ class API(base.Base): return self.update(context, - instance['id'], - state_description='terminating', - state=0, + instance_id, + vm_state=vm_state.DELETE, terminated_at=utils.utcnow()) host = instance['host'] @@ -671,9 +674,8 @@ class API(base.Base): return self.update(context, - instance['id'], - state_description='stopping', - state=power_state.NOSTATE, + instance_id, + vm_state=vm_state.STOP, terminated_at=utils.utcnow()) host = instance['host'] @@ -685,12 +687,15 @@ class API(base.Base): """Start an instance.""" LOG.debug(_("Going to try to start %s"), instance_id) instance = self._get_instance(context, instance_id, 'starting') - if instance['state_description'] != 'stopped': - _state_description = instance['state_description'] + vm_state = instance["vm_state"] + + if vm_state != vm_state.STOP: LOG.warning(_("Instance %(instance_id)s is not " - "stopped(%(_state_description)s)") % locals()) + "stopped. (%(vm_state)s)") % locals()) return + self.update(context, instance_id, vm_state=vm_state.ACTIVE) + # TODO(yamahata): injected_files isn't supported right now. # It is used only for osapi. not for ec2 api. # availability_zone isn't used by run_instance. @@ -918,6 +923,7 @@ class API(base.Base): @scheduler_api.reroute_compute("reboot") def reboot(self, context, instance_id): """Reboot the given instance.""" + self.update(context, instance_id, vm_state=vm_state.REBOOT) self._cast_compute_message('reboot_instance', context, instance_id) @scheduler_api.reroute_compute("rebuild") @@ -925,8 +931,12 @@ class API(base.Base): metadata=None, files_to_inject=None): """Rebuild the given instance with the provided metadata.""" instance = db.api.instance_get(context, instance_id) + invalid_rebuild_states = [ + vm_state.BUILD, + vm_state.REBUILD, + ] - if instance["state"] == power_state.BUILDING: + if instance["vm_state"] in invalid_rebuild_states: msg = _("Instance already building") raise exception.BuildInProgress(msg) @@ -946,6 +956,8 @@ class API(base.Base): "injected_files": files_to_inject, } + self.update(context, instance_id, vm_state=vm_state.REBUILD) + self._cast_compute_message('rebuild_instance', context, instance_id, @@ -963,6 +975,8 @@ class API(base.Base): raise exception.MigrationNotFoundByStatus(instance_id=instance_id, status='finished') + self.update(context, instance_id, vm_state=vm_state.ACTIVE) + params = {'migration_id': migration_ref['id']} self._cast_compute_message('revert_resize', context, instance_ref['uuid'], @@ -983,6 +997,9 @@ class API(base.Base): if not migration_ref: raise exception.MigrationNotFoundByStatus(instance_id=instance_id, status='finished') + + self.update(context, instance_id, vm_state=vm_state.ACTIVE) + params = {'migration_id': migration_ref['id']} self._cast_compute_message('confirm_resize', context, instance_ref['uuid'], @@ -1028,6 +1045,8 @@ class API(base.Base): if (current_memory_mb == new_memory_mb) and flavor_id: raise exception.CannotResizeToSameSize() + self.update(context, instance_id, vm_state=vm_state.RESIZE) + instance_ref = self._get_instance(context, instance_id, 'resize') self._cast_scheduler_message(context, {"method": "prep_resize", @@ -1061,11 +1080,13 @@ class API(base.Base): @scheduler_api.reroute_compute("pause") def pause(self, context, instance_id): """Pause the given instance.""" + self.update(context, instance_id, vm_state=vm_state.PAUSE) self._cast_compute_message('pause_instance', context, instance_id) @scheduler_api.reroute_compute("unpause") def unpause(self, context, instance_id): """Unpause the given instance.""" + self.update(context, instance_id, vm_state=vm_state.ACTIVE) self._cast_compute_message('unpause_instance', context, instance_id) def set_host_enabled(self, context, host, enabled): @@ -1092,21 +1113,25 @@ class API(base.Base): @scheduler_api.reroute_compute("suspend") def suspend(self, context, instance_id): """Suspend the given instance.""" + self.update(context, instance_id, vm_state=vm_state.SUSPEND) self._cast_compute_message('suspend_instance', context, instance_id) @scheduler_api.reroute_compute("resume") def resume(self, context, instance_id): """Resume the given instance.""" + self.update(context, instance_id, vm_state=vm_state.ACTIVE) self._cast_compute_message('resume_instance', context, instance_id) @scheduler_api.reroute_compute("rescue") def rescue(self, context, instance_id): """Rescue the given instance.""" + self.update(context, instance_id, vm_state=vm_state.RESCUE) self._cast_compute_message('rescue_instance', context, instance_id) @scheduler_api.reroute_compute("unrescue") def unrescue(self, context, instance_id): """Unrescue the given instance.""" + self.update(context, instance_id, vm_state=vm_state.ACTIVE) self._cast_compute_message('unrescue_instance', context, instance_id) @scheduler_api.reroute_compute("set_admin_password") diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 3299268f2..34c6bc1ea 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -56,6 +56,8 @@ from nova import rpc from nova import utils from nova import volume from nova.compute import power_state +from nova.compute import task_state +from nova.compute import vm_state from nova.notifier import api as notifier from nova.compute.utils import terminate_volumes from nova.virt import driver @@ -146,6 +148,10 @@ class ComputeManager(manager.SchedulerDependentManager): super(ComputeManager, self).__init__(service_name="compute", *args, **kwargs) + def _instance_update(self, context, instance_id, **kwargs): + """Update an instance in the database using kwargs as value.""" + return self.db.instance_update(context, instance_id, kwargs) + def init_host(self): """Initialization for a standalone compute service.""" self.driver.init_host(host=self.host) @@ -153,8 +159,8 @@ class ComputeManager(manager.SchedulerDependentManager): instances = self.db.instance_get_all_by_host(context, self.host) for instance in instances: inst_name = instance['name'] - db_state = instance['state'] - drv_state = self._update_state(context, instance['id']) + db_state = instance['power_state'] + drv_state = self._get_power_state(context, instance) expect_running = db_state == power_state.RUNNING \ and drv_state != db_state @@ -177,34 +183,13 @@ class ComputeManager(manager.SchedulerDependentManager): LOG.warning(_('Hypervisor driver does not ' 'support firewall rules')) - def _update_state(self, context, instance_id, state=None): - """Update the state of an instance from the driver info.""" - instance_ref = self.db.instance_get(context, instance_id) - - if state is None: - try: - LOG.debug(_('Checking state of %s'), instance_ref['name']) - info = self.driver.get_info(instance_ref['name']) - except exception.NotFound: - info = None - - if info is not None: - state = info['state'] - else: - state = power_state.FAILED - - self.db.instance_set_state(context, instance_id, state) - return state - - def _update_launched_at(self, context, instance_id, launched_at=None): - """Update the launched_at parameter of the given instance.""" - data = {'launched_at': launched_at or utils.utcnow()} - self.db.instance_update(context, instance_id, data) - - def _update_image_ref(self, context, instance_id, image_ref): - """Update the image_id for the given instance.""" - data = {'image_ref': image_ref} - self.db.instance_update(context, instance_id, data) + def _get_power_state(self, context, instance): + """Retrieve the power state for the given instance.""" + LOG.debug(_('Checking state of %s'), instance['name']) + try: + return self.driver.get_info(instance['name'])["state"] + except exception.NotFound: + return power_state.FAILED def get_console_topic(self, context, **kwargs): """Retrieves the console host for a project on this host. @@ -388,13 +373,10 @@ class ComputeManager(manager.SchedulerDependentManager): # NOTE(vish): used by virt but not in database updates['injected_files'] = kwargs.get('injected_files', []) updates['admin_pass'] = kwargs.get('admin_password', None) - instance = self.db.instance_update(context, - instance_id, - updates) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'networking') + updates['vm_state'] = vm_state.BUILD + updates['task_state'] = task_state.NETWORKING + + instance = self.db.instance_update(context, instance_id, updates) is_vpn = instance['image_ref'] == str(FLAGS.vpn_image_id) try: @@ -413,6 +395,11 @@ class ComputeManager(manager.SchedulerDependentManager): # all vif creation and network injection, maybe this is correct network_info = [] + self._instance_update(context, + instance_id, + vm_state=vm_state.BUILD, + task_state=task_state.BLOCK_DEVICE_MAPPING) + (swap, ephemerals, block_device_mapping) = self._setup_block_device_mapping( context, instance_id) @@ -422,9 +409,11 @@ class ComputeManager(manager.SchedulerDependentManager): 'ephemerals': ephemerals, 'block_device_mapping': block_device_mapping} - # TODO(vish) check to make sure the availability zone matches - self._update_state(context, instance_id, power_state.BUILDING) + self._instance_update(context, + instance_id, + task_state=task_state.SPAWN) + # TODO(vish) check to make sure the availability zone matches try: self.driver.spawn(context, instance, network_info, block_device_info) @@ -433,13 +422,21 @@ class ComputeManager(manager.SchedulerDependentManager): "virtualization enabled in the BIOS? Details: " "%(ex)s") % locals() LOG.exception(msg) + return + + current_power_state = self._get_power_state(context, instance) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=None, + launched_at=utils.utcnow()) - self._update_launched_at(context, instance_id) - self._update_state(context, instance_id) usage_info = utils.usage_from_instance(instance) notifier.notify('compute.%s' % self.host, 'compute.instance.create', notifier.INFO, usage_info) + except exception.InstanceNotFound: # FIXME(wwolf): We are just ignoring InstanceNotFound # exceptions here in case the instance was immediately @@ -523,11 +520,22 @@ class ComputeManager(manager.SchedulerDependentManager): instance_ref = self.db.instance_get(context, instance_id) LOG.audit(_("Rebuilding instance %s"), instance_id, context=context) - self._update_state(context, instance_id, power_state.BUILDING) + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.REBUILD, + task_state=task_state.REBUILDING) network_info = self._get_instance_nw_info(context, instance_ref) - self.driver.destroy(instance_ref, network_info) + + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.REBUILD, + task_state=task_state.SPAWN) + image_ref = kwargs.get('image_ref') instance_ref.image_ref = image_ref instance_ref.injected_files = kwargs.get('injected_files', []) @@ -536,9 +544,15 @@ class ComputeManager(manager.SchedulerDependentManager): bd_mapping = self._setup_block_device_mapping(context, instance_id) self.driver.spawn(context, instance_ref, network_info, bd_mapping) - self._update_image_ref(context, instance_id, image_ref) - self._update_launched_at(context, instance_id) - self._update_state(context, instance_id) + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=None, + image_ref=image_ref, + launched_at=utils.utcnow()) + usage_info = utils.usage_from_instance(instance_ref, image_ref=image_ref) notifier.notify('compute.%s' % self.host, @@ -550,26 +564,34 @@ class ComputeManager(manager.SchedulerDependentManager): @checks_instance_lock def reboot_instance(self, context, instance_id): """Reboot an instance on this host.""" + LOG.audit(_("Rebooting instance %s"), instance_id, context=context) context = context.elevated() - self._update_state(context, instance_id) instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_("Rebooting instance %s"), instance_id, context=context) - if instance_ref['state'] != power_state.RUNNING: - state = instance_ref['state'] + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.REBOOT, + task_state=task_state.REBOOTING) + + if instance_ref['power_state'] != power_state.RUNNING: + state = instance_ref['power_state'] running = power_state.RUNNING LOG.warn(_('trying to reboot a non-running ' 'instance: %(instance_id)s (state: %(state)s ' 'expected: %(running)s)') % locals(), context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'rebooting') network_info = self._get_instance_nw_info(context, instance_ref) self.driver.reboot(instance_ref, network_info) - self._update_state(context, instance_id) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=None) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) def snapshot_instance(self, context, instance_id, image_id, @@ -585,37 +607,41 @@ class ComputeManager(manager.SchedulerDependentManager): :param rotation: int representing how many backups to keep around; None if rotation shouldn't be used (as in the case of snapshots) """ + if image_type != "snapshot" and image_type != "backup": + raise Exception(_('Image type not recognized %s') % image_type) + context = context.elevated() instance_ref = self.db.instance_get(context, instance_id) - #NOTE(sirp): update_state currently only refreshes the state field - # if we add is_snapshotting, we will need this refreshed too, - # potentially? - self._update_state(context, instance_id) + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=image_type) LOG.audit(_('instance %s: snapshotting'), instance_id, context=context) - if instance_ref['state'] != power_state.RUNNING: - state = instance_ref['state'] + + if instance_ref['power_state'] != power_state.RUNNING: + state = instance_ref['power_state'] running = power_state.RUNNING LOG.warn(_('trying to snapshot a non-running ' 'instance: %(instance_id)s (state: %(state)s ' 'expected: %(running)s)') % locals()) self.driver.snapshot(context, instance_ref, image_id) + self._instance_update(context, instance_id, task_state=None) + + if image_type == 'snapshot' and rotation: + raise exception.ImageRotationNotAllowed() + + elif image_type == 'backup' and rotation: + instance_uuid = instance_ref['uuid'] + self.rotate_backups(context, instance_uuid, backup_type, rotation) - if image_type == 'snapshot': - if rotation: - raise exception.ImageRotationNotAllowed() elif image_type == 'backup': - if rotation: - instance_uuid = instance_ref['uuid'] - self.rotate_backups(context, instance_uuid, backup_type, - rotation) - else: - raise exception.RotationRequiredForBackup() - else: - raise Exception(_('Image type not recognized %s') % image_type) + raise exception.RotationRequiredForBackup() def rotate_backups(self, context, instance_uuid, backup_type, rotation): """Delete excess backups associated to an instance. @@ -751,40 +777,51 @@ class ComputeManager(manager.SchedulerDependentManager): @checks_instance_lock def rescue_instance(self, context, instance_id): """Rescue an instance on this host.""" + LOG.audit(_('instance %s: rescuing'), instance_id, context=context) context = context.elevated() + + self._instance_update(context, + instance_id, + vm_state=vm_state.RESCUE, + task_state=task_state.RESCUING) + instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_('instance %s: rescuing'), instance_id, context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'rescuing') - _update_state = lambda result: self._update_state_callback( - self, context, instance_id, result) network_info = self._get_instance_nw_info(context, instance_ref) - self.driver.rescue(context, instance_ref, _update_state, network_info) - self._update_state(context, instance_id) + + # NOTE(blamar): None of the virt drivers use the 'callback' param + self.driver.rescue(context, instance_ref, None, network_info) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + vm_state=vm_state.RESCUE, + task_state=task_state.RESCUED, + power_state=current_power_state) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) @checks_instance_lock def unrescue_instance(self, context, instance_id): """Rescue an instance on this host.""" + LOG.audit(_('instance %s: unrescuing'), instance_id, context=context) context = context.elevated() + + self._instance_update(context, + instance_id, + vm_state=vm_state.ACTIVE, + task_state=task_state.UNRESCUING) + instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_('instance %s: unrescuing'), instance_id, context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'unrescuing') - _update_state = lambda result: self._update_state_callback( - self, context, instance_id, result) network_info = self._get_instance_nw_info(context, instance_ref) - self.driver.unrescue(instance_ref, _update_state, network_info) - self._update_state(context, instance_id) - @staticmethod - def _update_state_callback(self, context, instance_id, result): - """Update instance state when async task completes.""" - self._update_state(context, instance_id) + # NOTE(blamar): None of the virt drivers use the 'callback' param + self.driver.unrescue(instance_ref, None, network_info) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + vm_state=vm_state.ACTIVE, + task_state=None, + power_state=current_power_state) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) @checks_instance_lock @@ -843,11 +880,12 @@ class ComputeManager(manager.SchedulerDependentManager): # Just roll back the record. There's no need to resize down since # the 'old' VM already has the preferred attributes - self.db.instance_update(context, instance_ref['uuid'], - dict(memory_mb=instance_type['memory_mb'], - vcpus=instance_type['vcpus'], - local_gb=instance_type['local_gb'], - instance_type_id=instance_type['id'])) + self._instance_update(context, + instance_ref["uuid"], + memory_mb=instance_type['memory_mb'], + vcpus=instance_type['vcpus'], + local_gb=instance_type['local_gb'], + instance_type_id=instance_type['id']) self.driver.revert_migration(instance_ref) self.db.migration_update(context, migration_id, @@ -1000,35 +1038,45 @@ class ComputeManager(manager.SchedulerDependentManager): @checks_instance_lock def pause_instance(self, context, instance_id): """Pause an instance on this host.""" + LOG.audit(_('instance %s: pausing'), instance_id, context=context) context = context.elevated() + + self._instance_update(context, + instance_id, + vm_state=vm_state.PAUSE, + task_state=task_state.PAUSING) + instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_('instance %s: pausing'), instance_id, context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'pausing') - self.driver.pause(instance_ref, - lambda result: self._update_state_callback(self, - context, - instance_id, - result)) + self.driver.pause(instance_ref, lambda result: None) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.PAUSE, + task_state=None) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) @checks_instance_lock def unpause_instance(self, context, instance_id): """Unpause a paused instance on this host.""" + LOG.audit(_('instance %s: unpausing'), instance_id, context=context) context = context.elevated() + + self._instance_update(context, + instance_id, + vm_state=vm_state.ACTIVE, + task_state=task_state.UNPAUSING) + instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_('instance %s: unpausing'), instance_id, context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'unpausing') - self.driver.unpause(instance_ref, - lambda result: self._update_state_callback(self, - context, - instance_id, - result)) + self.driver.unpause(instance_ref, lambda result: None) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=None) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) def host_power_action(self, context, host=None, action=None): @@ -1053,33 +1101,45 @@ class ComputeManager(manager.SchedulerDependentManager): @checks_instance_lock def suspend_instance(self, context, instance_id): """Suspend the given instance.""" + LOG.audit(_('instance %s: suspending'), instance_id, context=context) context = context.elevated() + + self._instance_update(context, + instance_id, + vm_state=vm_state.SUSPEND, + task_state=task_state.SUSPENDING) + instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_('instance %s: suspending'), instance_id, context=context) - self.db.instance_set_state(context, instance_id, - power_state.NOSTATE, - 'suspending') - self.driver.suspend(instance_ref, - lambda result: self._update_state_callback(self, - context, - instance_id, - result)) + self.driver.suspend(instance_ref, lambda result: None) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.SUSPEND, + task_state=None) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) @checks_instance_lock def resume_instance(self, context, instance_id): """Resume the given suspended instance.""" + LOG.audit(_('instance %s: resuming'), instance_id, context=context) context = context.elevated() + + self._instance_update(context, + instance_id, + vm_state=vm_state.ACTIVE, + task_state=task_state.RESUMING) + instance_ref = self.db.instance_get(context, instance_id) - LOG.audit(_('instance %s: resuming'), instance_id, context=context) - self.db.instance_set_state(context, instance_id, - power_state.NOSTATE, - 'resuming') - self.driver.resume(instance_ref, - lambda result: self._update_state_callback(self, - context, - instance_id, - result)) + self.driver.resume(instance_ref, lambda result: None) + + current_power_state = self._get_power_state(context, instance_ref) + self._instance_update(context, + instance_id, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=None) @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id()) def lock_instance(self, context, instance_id): @@ -1489,11 +1549,14 @@ class ComputeManager(manager.SchedulerDependentManager): 'block_migration': block_migration}}) # Restore instance state - self.db.instance_update(ctxt, - instance_ref['id'], - {'state_description': 'running', - 'state': power_state.RUNNING, - 'host': dest}) + current_power_state = self._get_power_state(ctxt, instance_ref) + self._instance_update(ctxt, + instance_ref["id"], + host=dest, + power_state=current_power_state, + vm_state=vm_state.ACTIVE, + task_state=None) + # Restore volume state for volume_ref in instance_ref['volumes']: volume_id = volume_ref['id'] @@ -1539,11 +1602,11 @@ class ComputeManager(manager.SchedulerDependentManager): This param specifies destination host. """ host = instance_ref['host'] - self.db.instance_update(context, - instance_ref['id'], - {'state_description': 'running', - 'state': power_state.RUNNING, - 'host': host}) + self._instance_update(context, + instance_ref['id'], + host=host, + vm_state=vm_state.ACTIVE, + task_state=None) for volume_ref in instance_ref['volumes']: volume_id = volume_ref['id'] @@ -1591,10 +1654,9 @@ class ComputeManager(manager.SchedulerDependentManager): error_list.append(ex) try: - self._poll_instance_states(context) + self._sync_power_states(context) except Exception as ex: - LOG.warning(_("Error during instance poll: %s"), - unicode(ex)) + LOG.warning(_("Error during power_state sync: %s"), unicode(ex)) error_list.append(ex) return error_list @@ -1609,68 +1671,39 @@ class ComputeManager(manager.SchedulerDependentManager): self.update_service_capabilities( self.driver.get_host_stats(refresh=True)) - def _poll_instance_states(self, context): - vm_instances = self.driver.list_instances_detail() - vm_instances = dict((vm.name, vm) for vm in vm_instances) + def _sync_power_states(self, context): + """Align power states between the database and the hypervisor. - # Keep a list of VMs not in the DB, cross them off as we find them - vms_not_found_in_db = list(vm_instances.keys()) + The hypervisor is authoritative for the power_state data, so we + simply loop over all known instances for this host and update the + power_state according to the hypervisor. If the instance is not found + then it will be set to power_state.NOSTATE, because it doesn't exist + on the hypervisor. + """ + vm_instances = self.driver.list_instances_detail() db_instances = self.db.instance_get_all_by_host(context, self.host) + num_vm_instances = len(vm_instances) + num_db_instances = len(db_instances) + + if num_vm_instances != num_db_instances: + LOG.info(_("Found %(num_db_instances)s in the database and " + "%(num_vm_instances)s on the hypervisor.") % locals()) + for db_instance in db_instances: - name = db_instance['name'] - db_state = db_instance['state'] + name = db_instance["name"] + db_power_state = db_instance['power_state'] vm_instance = vm_instances.get(name) if vm_instance is None: - # NOTE(justinsb): We have to be very careful here, because a - # concurrent operation could be in progress (e.g. a spawn) - if db_state == power_state.BUILDING: - # TODO(justinsb): This does mean that if we crash during a - # spawn, the machine will never leave the spawning state, - # but this is just the way nova is; this function isn't - # trying to correct that problem. - # We could have a separate task to correct this error. - # TODO(justinsb): What happens during a live migration? - LOG.info(_("Found instance '%(name)s' in DB but no VM. " - "State=%(db_state)s, so assuming spawn is in " - "progress.") % locals()) - vm_state = db_state - else: - LOG.info(_("Found instance '%(name)s' in DB but no VM. " - "State=%(db_state)s, so setting state to " - "shutoff.") % locals()) - vm_state = power_state.SHUTOFF - if db_instance['state_description'] == 'stopping': - self.db.instance_stop(context, db_instance['id']) - continue + vm_power_state = power_state.NOSTATE else: - vm_state = vm_instance.state - vms_not_found_in_db.remove(name) - - if (db_instance['state_description'] in ['migrating', 'stopping']): - # A situation which db record exists, but no instance" - # sometimes occurs while live-migration at src compute, - # this case should be ignored. - LOG.debug(_("Ignoring %(name)s, as it's currently being " - "migrated.") % locals()) - continue - - if vm_state != db_state: - LOG.info(_("DB/VM state mismatch. Changing state from " - "'%(db_state)s' to '%(vm_state)s'") % locals()) - self._update_state(context, db_instance['id'], vm_state) + vm_power_state = vm_instance["power_state"] - # NOTE(justinsb): We no longer auto-remove SHUTOFF instances - # It's quite hard to get them back when we do. - - # Are there VMs not in the DB? - for vm_not_found_in_db in vms_not_found_in_db: - name = vm_not_found_in_db + if vm_power_state == db_power_state: + continue - # We only care about instances that compute *should* know about - if name.startswith("instance-"): - # TODO(justinsb): What to do here? Adopt it? Shut it down? - LOG.warning(_("Found VM not in DB: '%(name)s'. Ignoring") - % locals()) + self._instance_update(context, + db_instance["id"], + power_state=vm_power_state) diff --git a/nova/compute/task_state.py b/nova/compute/task_state.py index b4dc9af51..55466c783 100644 --- a/nova/compute/task_state.py +++ b/nova/compute/task_state.py @@ -17,12 +17,27 @@ """Possible task states for instances""" -BUILD_BLOCK_DEVICE_MAPPING='block_device_mapping' +BLOCK_DEVICE_MAPPING='block_device_mapping' NETWORKING='networking' +SPAWN='spawn' +SNAPSHOT='snapshot' +BACKUP='backup' PASSWORD='password' RESIZE_PREP='resize_prep' RESIZE_MIGRATING='resize_migrating' RESIZE_MIGRATED='resize_migrated' RESIZE_FINISH='resize_finish' + +REBUILDING='rebuilding' + +REBOOTING='rebooting' +PAUSING='pausing' +UNPAUSING='unpausing' +SUSPENDING='suspending' +RESUMING='resuming' + +RESCUING='rescuing' +RESCUED='rescued' +UNRESCUING='unrescuing' diff --git a/nova/compute/vm_state.py b/nova/compute/vm_state.py index e81cba1f0..a1bca6ef4 100644 --- a/nova/compute/vm_state.py +++ b/nova/compute/vm_state.py @@ -17,19 +17,15 @@ """Possible vm states for instances""" +ACTIVE='active' BUILD='build' REBUILD='rebuild' REBOOT='reboot' DELETE='delete' STOP='stop' -START='start' +MIGRATE='migrate' RESIZE='resize' VERIFY_RESIZE='verify_resize' PAUSE='pause' -UNPAUSE='unpause' - SUSPEND='suspend' -RESUME='resume' - RESCUE='rescue' -UNRESCUE='unrescue' |
