Updated compute manager/API to use vm/task states.

Updated vm/task states to cover a few more cases I encountered.
author: Brian Lamar <brian.lamar@rackspace.com> 2011-08-17 16:23:40 -0400
committer: Brian Lamar <brian.lamar@rackspace.com> 2011-08-17 16:23:40 -0400
commit: 1d1d027554d6be355bd9b52b2d87081d06f05045 (patch)
tree: 5aab070465f439eef7a0b147abc09e8368dffee2
parent: bd2e98c064b7c1e9c866f3013e13af7883e11e05 (diff)
4 files changed, 296 insertions, 227 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index e909e9959..ec760853e 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -36,6 +36,7 @@ from nova import utils
 from nova import volume
 from nova.compute import instance_types
 from nova.compute import power_state
+from nova.compute import vm_state
 from nova.compute.utils import terminate_volumes
 from nova.scheduler import api as scheduler_api
 from nova.db import base
@@ -74,10 +75,13 @@ def generate_default_hostname(instance):
 
 
 def _is_able_to_shutdown(instance, instance_id):
-    states = {'terminating': "Instance %s is already being terminated",
-              'migrating': "Instance %s is being migrated",
-              'stopping': "Instance %s is being stopped"}
-    msg = states.get(instance['state_description'])
+    states = {
+        vm_state.DELETE: "Instance %s is already being terminated",
+        vm_state.MIGRATE: "Instance %s is being migrated",
+        vm_state.RESIZE: "Instance %s is being resized",
+        vm_state.STOP: "Instance %s is being stopped",
+    }
+    msg = states.get(instance['vm_state'])
     if msg:
         LOG.warning(_(msg), instance_id)
         return False
@@ -231,8 +235,8 @@ class API(base.Base):
             'image_ref': image_href,
             'kernel_id': kernel_id or '',
             'ramdisk_id': ramdisk_id or '',
-            'state': 0,
-            'state_description': 'scheduling',
+            'power_state': power_state.NOSTATE,
+            'vm_state': vm_state.BUILD,
             'user_id': context.user_id,
             'project_id': context.project_id,
             'launch_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
@@ -648,9 +652,8 @@ class API(base.Base):
             return
 
         self.update(context,
-                    instance['id'],
-                    state_description='terminating',
-                    state=0,
+                    instance_id,
+                    vm_state=vm_state.DELETE,
                     terminated_at=utils.utcnow())
 
         host = instance['host']
@@ -671,9 +674,8 @@ class API(base.Base):
             return
 
         self.update(context,
-                    instance['id'],
-                    state_description='stopping',
-                    state=power_state.NOSTATE,
+                    instance_id,
+                    vm_state=vm_state.STOP,
                     terminated_at=utils.utcnow())
 
         host = instance['host']
@@ -685,12 +687,15 @@ class API(base.Base):
         """Start an instance."""
         LOG.debug(_("Going to try to start %s"), instance_id)
         instance = self._get_instance(context, instance_id, 'starting')
-        if instance['state_description'] != 'stopped':
-            _state_description = instance['state_description']
+        vm_state = instance["vm_state"]
+
+        if vm_state != vm_state.STOP:
             LOG.warning(_("Instance %(instance_id)s is not "
-                          "stopped(%(_state_description)s)") % locals())
+                          "stopped. (%(vm_state)s)") % locals())
             return
 
+        self.update(context, instance_id, vm_state=vm_state.ACTIVE)
+
         # TODO(yamahata): injected_files isn't supported right now.
         #                 It is used only for osapi. not for ec2 api.
         #                 availability_zone isn't used by run_instance.
@@ -918,6 +923,7 @@ class API(base.Base):
     @scheduler_api.reroute_compute("reboot")
     def reboot(self, context, instance_id):
         """Reboot the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.REBOOT)
         self._cast_compute_message('reboot_instance', context, instance_id)
 
     @scheduler_api.reroute_compute("rebuild")
@@ -925,8 +931,12 @@ class API(base.Base):
             metadata=None, files_to_inject=None):
         """Rebuild the given instance with the provided metadata."""
         instance = db.api.instance_get(context, instance_id)
+        invalid_rebuild_states = [
+            vm_state.BUILD,
+            vm_state.REBUILD,
+        ]
 
-        if instance["state"] == power_state.BUILDING:
+        if instance["vm_state"] in invalid_rebuild_states:
             msg = _("Instance already building")
             raise exception.BuildInProgress(msg)
 
@@ -946,6 +956,8 @@ class API(base.Base):
             "injected_files": files_to_inject,
         }
 
+        self.update(context, instance_id, vm_state=vm_state.REBUILD)
+
         self._cast_compute_message('rebuild_instance',
                                    context,
                                    instance_id,
@@ -963,6 +975,8 @@ class API(base.Base):
             raise exception.MigrationNotFoundByStatus(instance_id=instance_id,
                                                       status='finished')
 
+        self.update(context, instance_id, vm_state=vm_state.ACTIVE)
+
         params = {'migration_id': migration_ref['id']}
         self._cast_compute_message('revert_resize', context,
                                    instance_ref['uuid'],
@@ -983,6 +997,9 @@ class API(base.Base):
         if not migration_ref:
             raise exception.MigrationNotFoundByStatus(instance_id=instance_id,
                                                       status='finished')
+
+        self.update(context, instance_id, vm_state=vm_state.ACTIVE)
+
         params = {'migration_id': migration_ref['id']}
         self._cast_compute_message('confirm_resize', context,
                                    instance_ref['uuid'],
@@ -1028,6 +1045,8 @@ class API(base.Base):
         if (current_memory_mb == new_memory_mb) and flavor_id:
             raise exception.CannotResizeToSameSize()
 
+        self.update(context, instance_id, vm_state=vm_state.RESIZE)
+
         instance_ref = self._get_instance(context, instance_id, 'resize')
         self._cast_scheduler_message(context,
                     {"method": "prep_resize",
@@ -1061,11 +1080,13 @@ class API(base.Base):
     @scheduler_api.reroute_compute("pause")
     def pause(self, context, instance_id):
         """Pause the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.PAUSE)
         self._cast_compute_message('pause_instance', context, instance_id)
 
     @scheduler_api.reroute_compute("unpause")
     def unpause(self, context, instance_id):
         """Unpause the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.ACTIVE)
         self._cast_compute_message('unpause_instance', context, instance_id)
 
     def set_host_enabled(self, context, host, enabled):
@@ -1092,21 +1113,25 @@ class API(base.Base):
     @scheduler_api.reroute_compute("suspend")
     def suspend(self, context, instance_id):
         """Suspend the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.SUSPEND)
         self._cast_compute_message('suspend_instance', context, instance_id)
 
     @scheduler_api.reroute_compute("resume")
     def resume(self, context, instance_id):
         """Resume the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.ACTIVE)
         self._cast_compute_message('resume_instance', context, instance_id)
 
     @scheduler_api.reroute_compute("rescue")
     def rescue(self, context, instance_id):
         """Rescue the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.RESCUE)
         self._cast_compute_message('rescue_instance', context, instance_id)
 
     @scheduler_api.reroute_compute("unrescue")
     def unrescue(self, context, instance_id):
         """Unrescue the given instance."""
+        self.update(context, instance_id, vm_state=vm_state.ACTIVE)
         self._cast_compute_message('unrescue_instance', context, instance_id)
 
     @scheduler_api.reroute_compute("set_admin_password")
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 3299268f2..34c6bc1ea 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -56,6 +56,8 @@ from nova import rpc
 from nova import utils
 from nova import volume
 from nova.compute import power_state
+from nova.compute import task_state
+from nova.compute import vm_state
 from nova.notifier import api as notifier
 from nova.compute.utils import terminate_volumes
 from nova.virt import driver
@@ -146,6 +148,10 @@ class ComputeManager(manager.SchedulerDependentManager):
         super(ComputeManager, self).__init__(service_name="compute",
                                              *args, **kwargs)
 
+    def _instance_update(self, context, instance_id, **kwargs):
+        """Update an instance in the database using kwargs as value."""
+        return self.db.instance_update(context, instance_id, kwargs)
+
     def init_host(self):
         """Initialization for a standalone compute service."""
         self.driver.init_host(host=self.host)
@@ -153,8 +159,8 @@ class ComputeManager(manager.SchedulerDependentManager):
         instances = self.db.instance_get_all_by_host(context, self.host)
         for instance in instances:
             inst_name = instance['name']
-            db_state = instance['state']
-            drv_state = self._update_state(context, instance['id'])
+            db_state = instance['power_state']
+            drv_state = self._get_power_state(context, instance)
 
             expect_running = db_state == power_state.RUNNING \
                              and drv_state != db_state
@@ -177,34 +183,13 @@ class ComputeManager(manager.SchedulerDependentManager):
                     LOG.warning(_('Hypervisor driver does not '
                             'support firewall rules'))
 
-    def _update_state(self, context, instance_id, state=None):
-        """Update the state of an instance from the driver info."""
-        instance_ref = self.db.instance_get(context, instance_id)
-
-        if state is None:
-            try:
-                LOG.debug(_('Checking state of %s'), instance_ref['name'])
-                info = self.driver.get_info(instance_ref['name'])
-            except exception.NotFound:
-                info = None
-
-            if info is not None:
-                state = info['state']
-            else:
-                state = power_state.FAILED
-
-        self.db.instance_set_state(context, instance_id, state)
-        return state
-
-    def _update_launched_at(self, context, instance_id, launched_at=None):
-        """Update the launched_at parameter of the given instance."""
-        data = {'launched_at': launched_at or utils.utcnow()}
-        self.db.instance_update(context, instance_id, data)
-
-    def _update_image_ref(self, context, instance_id, image_ref):
-        """Update the image_id for the given instance."""
-        data = {'image_ref': image_ref}
-        self.db.instance_update(context, instance_id, data)
+    def _get_power_state(self, context, instance):
+        """Retrieve the power state for the given instance."""
+        LOG.debug(_('Checking state of %s'), instance['name'])
+        try:
+            return self.driver.get_info(instance['name'])["state"]
+        except exception.NotFound:
+            return power_state.FAILED
 
     def get_console_topic(self, context, **kwargs):
         """Retrieves the console host for a project on this host.
@@ -388,13 +373,10 @@ class ComputeManager(manager.SchedulerDependentManager):
         # NOTE(vish): used by virt but not in database
         updates['injected_files'] = kwargs.get('injected_files', [])
         updates['admin_pass'] = kwargs.get('admin_password', None)
-        instance = self.db.instance_update(context,
-                                           instance_id,
-                                           updates)
-        self.db.instance_set_state(context,
-                                   instance_id,
-                                   power_state.NOSTATE,
-                                   'networking')
+        updates['vm_state'] = vm_state.BUILD
+        updates['task_state'] = task_state.NETWORKING
+
+        instance = self.db.instance_update(context, instance_id, updates)
 
         is_vpn = instance['image_ref'] == str(FLAGS.vpn_image_id)
         try:
@@ -413,6 +395,11 @@ class ComputeManager(manager.SchedulerDependentManager):
                 # all vif creation and network injection, maybe this is correct
                 network_info = []
 
+            self._instance_update(context,
+                                  instance_id,
+                                  vm_state=vm_state.BUILD,
+                                  task_state=task_state.BLOCK_DEVICE_MAPPING)
+
             (swap, ephemerals,
              block_device_mapping) = self._setup_block_device_mapping(
                 context, instance_id)
@@ -422,9 +409,11 @@ class ComputeManager(manager.SchedulerDependentManager):
                 'ephemerals': ephemerals,
                 'block_device_mapping': block_device_mapping}
 
-            # TODO(vish) check to make sure the availability zone matches
-            self._update_state(context, instance_id, power_state.BUILDING)
+            self._instance_update(context,
+                                  instance_id,
+                                  task_state=task_state.SPAWN)
 
+            # TODO(vish) check to make sure the availability zone matches
             try:
                 self.driver.spawn(context, instance,
                                   network_info, block_device_info)
@@ -433,13 +422,21 @@ class ComputeManager(manager.SchedulerDependentManager):
                         "virtualization enabled in the BIOS? Details: "
                         "%(ex)s") % locals()
                 LOG.exception(msg)
+                return
+
+            current_power_state = self._get_power_state(context, instance)
+            self._instance_update(context,
+                                  instance_id,
+                                  power_state=current_power_state,
+                                  vm_state=vm_state.ACTIVE,
+                                  task_state=None,
+                                  launched_at=utils.utcnow())
 
-            self._update_launched_at(context, instance_id)
-            self._update_state(context, instance_id)
             usage_info = utils.usage_from_instance(instance)
             notifier.notify('compute.%s' % self.host,
                             'compute.instance.create',
                             notifier.INFO, usage_info)
+
         except exception.InstanceNotFound:
             # FIXME(wwolf): We are just ignoring InstanceNotFound
             # exceptions here in case the instance was immediately
@@ -523,11 +520,22 @@ class ComputeManager(manager.SchedulerDependentManager):
         instance_ref = self.db.instance_get(context, instance_id)
         LOG.audit(_("Rebuilding instance %s"), instance_id, context=context)
 
-        self._update_state(context, instance_id, power_state.BUILDING)
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.REBUILD,
+                              task_state=task_state.REBUILDING)
 
         network_info = self._get_instance_nw_info(context, instance_ref)
-
         self.driver.destroy(instance_ref, network_info)
+
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.REBUILD,
+                              task_state=task_state.SPAWN)
+
         image_ref = kwargs.get('image_ref')
         instance_ref.image_ref = image_ref
         instance_ref.injected_files = kwargs.get('injected_files', [])
@@ -536,9 +544,15 @@ class ComputeManager(manager.SchedulerDependentManager):
         bd_mapping = self._setup_block_device_mapping(context, instance_id)
         self.driver.spawn(context, instance_ref, network_info, bd_mapping)
 
-        self._update_image_ref(context, instance_id, image_ref)
-        self._update_launched_at(context, instance_id)
-        self._update_state(context, instance_id)
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None,
+                              image_ref=image_ref,
+                              launched_at=utils.utcnow())
+
         usage_info = utils.usage_from_instance(instance_ref,
                                                image_ref=image_ref)
         notifier.notify('compute.%s' % self.host,
@@ -550,26 +564,34 @@ class ComputeManager(manager.SchedulerDependentManager):
     @checks_instance_lock
     def reboot_instance(self, context, instance_id):
         """Reboot an instance on this host."""
+        LOG.audit(_("Rebooting instance %s"), instance_id, context=context)
         context = context.elevated()
-        self._update_state(context, instance_id)
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_("Rebooting instance %s"), instance_id, context=context)
 
-        if instance_ref['state'] != power_state.RUNNING:
-            state = instance_ref['state']
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.REBOOT,
+                              task_state=task_state.REBOOTING)
+
+        if instance_ref['power_state'] != power_state.RUNNING:
+            state = instance_ref['power_state']
             running = power_state.RUNNING
             LOG.warn(_('trying to reboot a non-running '
                      'instance: %(instance_id)s (state: %(state)s '
                      'expected: %(running)s)') % locals(),
                      context=context)
 
-        self.db.instance_set_state(context,
-                                   instance_id,
-                                   power_state.NOSTATE,
-                                   'rebooting')
         network_info = self._get_instance_nw_info(context, instance_ref)
         self.driver.reboot(instance_ref, network_info)
-        self._update_state(context, instance_id)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     def snapshot_instance(self, context, instance_id, image_id,
@@ -585,37 +607,41 @@ class ComputeManager(manager.SchedulerDependentManager):
         :param rotation: int representing how many backups to keep around;
             None if rotation shouldn't be used (as in the case of snapshots)
         """
+        if image_type != "snapshot" and image_type != "backup":
+            raise Exception(_('Image type not recognized %s') % image_type)
+
         context = context.elevated()
         instance_ref = self.db.instance_get(context, instance_id)
 
-        #NOTE(sirp): update_state currently only refreshes the state field
-        # if we add is_snapshotting, we will need this refreshed too,
-        # potentially?
-        self._update_state(context, instance_id)
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=image_type)
 
         LOG.audit(_('instance %s: snapshotting'), instance_id,
                   context=context)
-        if instance_ref['state'] != power_state.RUNNING:
-            state = instance_ref['state']
+
+        if instance_ref['power_state'] != power_state.RUNNING:
+            state = instance_ref['power_state']
             running = power_state.RUNNING
             LOG.warn(_('trying to snapshot a non-running '
                        'instance: %(instance_id)s (state: %(state)s '
                        'expected: %(running)s)') % locals())
 
         self.driver.snapshot(context, instance_ref, image_id)
+        self._instance_update(context, instance_id, task_state=None)
+
+        if image_type == 'snapshot' and rotation:
+            raise exception.ImageRotationNotAllowed()
+
+        elif image_type == 'backup' and rotation:
+            instance_uuid = instance_ref['uuid']
+            self.rotate_backups(context, instance_uuid, backup_type, rotation)
 
-        if image_type == 'snapshot':
-            if rotation:
-                raise exception.ImageRotationNotAllowed()
         elif image_type == 'backup':
-            if rotation:
-                instance_uuid = instance_ref['uuid']
-                self.rotate_backups(context, instance_uuid, backup_type,
-                                    rotation)
-            else:
-                raise exception.RotationRequiredForBackup()
-        else:
-            raise Exception(_('Image type not recognized %s') % image_type)
+            raise exception.RotationRequiredForBackup()
 
     def rotate_backups(self, context, instance_uuid, backup_type, rotation):
         """Delete excess backups associated to an instance.
@@ -751,40 +777,51 @@ class ComputeManager(manager.SchedulerDependentManager):
     @checks_instance_lock
     def rescue_instance(self, context, instance_id):
         """Rescue an instance on this host."""
+        LOG.audit(_('instance %s: rescuing'), instance_id, context=context)
         context = context.elevated()
+
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.RESCUE,
+                              task_state=task_state.RESCUING)
+
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_('instance %s: rescuing'), instance_id, context=context)
-        self.db.instance_set_state(context,
-                                   instance_id,
-                                   power_state.NOSTATE,
-                                   'rescuing')
-        _update_state = lambda result: self._update_state_callback(
-                self, context, instance_id, result)
         network_info = self._get_instance_nw_info(context, instance_ref)
-        self.driver.rescue(context, instance_ref, _update_state, network_info)
-        self._update_state(context, instance_id)
+
+        # NOTE(blamar): None of the virt drivers use the 'callback' param
+        self.driver.rescue(context, instance_ref, None, network_info)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.RESCUE,
+                              task_state=task_state.RESCUED,
+                              power_state=current_power_state)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @checks_instance_lock
     def unrescue_instance(self, context, instance_id):
         """Rescue an instance on this host."""
+        LOG.audit(_('instance %s: unrescuing'), instance_id, context=context)
         context = context.elevated()
+
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=task_state.UNRESCUING)
+
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_('instance %s: unrescuing'), instance_id, context=context)
-        self.db.instance_set_state(context,
-                                   instance_id,
-                                   power_state.NOSTATE,
-                                   'unrescuing')
-        _update_state = lambda result: self._update_state_callback(
-                self, context, instance_id, result)
         network_info = self._get_instance_nw_info(context, instance_ref)
-        self.driver.unrescue(instance_ref, _update_state, network_info)
-        self._update_state(context, instance_id)
 
-    @staticmethod
-    def _update_state_callback(self, context, instance_id, result):
-        """Update instance state when async task completes."""
-        self._update_state(context, instance_id)
+        # NOTE(blamar): None of the virt drivers use the 'callback' param
+        self.driver.unrescue(instance_ref, None, network_info)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None,
+                              power_state=current_power_state)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @checks_instance_lock
@@ -843,11 +880,12 @@ class ComputeManager(manager.SchedulerDependentManager):
 
         # Just roll back the record. There's no need to resize down since
         # the 'old' VM already has the preferred attributes
-        self.db.instance_update(context, instance_ref['uuid'],
-           dict(memory_mb=instance_type['memory_mb'],
-                vcpus=instance_type['vcpus'],
-                local_gb=instance_type['local_gb'],
-                instance_type_id=instance_type['id']))
+        self._instance_update(context,
+                              instance_ref["uuid"],
+                              memory_mb=instance_type['memory_mb'],
+                              vcpus=instance_type['vcpus'],
+                              local_gb=instance_type['local_gb'],
+                              instance_type_id=instance_type['id'])
 
         self.driver.revert_migration(instance_ref)
         self.db.migration_update(context, migration_id,
@@ -1000,35 +1038,45 @@ class ComputeManager(manager.SchedulerDependentManager):
     @checks_instance_lock
     def pause_instance(self, context, instance_id):
         """Pause an instance on this host."""
+        LOG.audit(_('instance %s: pausing'), instance_id, context=context)
         context = context.elevated()
+
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.PAUSE,
+                              task_state=task_state.PAUSING)
+
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_('instance %s: pausing'), instance_id, context=context)
-        self.db.instance_set_state(context,
-                                   instance_id,
-                                   power_state.NOSTATE,
-                                   'pausing')
-        self.driver.pause(instance_ref,
-            lambda result: self._update_state_callback(self,
-                                                       context,
-                                                       instance_id,
-                                                       result))
+        self.driver.pause(instance_ref, lambda result: None)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.PAUSE,
+                              task_state=None)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @checks_instance_lock
     def unpause_instance(self, context, instance_id):
         """Unpause a paused instance on this host."""
+        LOG.audit(_('instance %s: unpausing'), instance_id, context=context)
         context = context.elevated()
+
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=task_state.UNPAUSING)
+
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_('instance %s: unpausing'), instance_id, context=context)
-        self.db.instance_set_state(context,
-                                   instance_id,
-                                   power_state.NOSTATE,
-                                   'unpausing')
-        self.driver.unpause(instance_ref,
-            lambda result: self._update_state_callback(self,
-                                                       context,
-                                                       instance_id,
-                                                       result))
+        self.driver.unpause(instance_ref, lambda result: None)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     def host_power_action(self, context, host=None, action=None):
@@ -1053,33 +1101,45 @@ class ComputeManager(manager.SchedulerDependentManager):
     @checks_instance_lock
     def suspend_instance(self, context, instance_id):
         """Suspend the given instance."""
+        LOG.audit(_('instance %s: suspending'), instance_id, context=context)
         context = context.elevated()
+
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.SUSPEND,
+                              task_state=task_state.SUSPENDING)
+
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_('instance %s: suspending'), instance_id, context=context)
-        self.db.instance_set_state(context, instance_id,
-                                            power_state.NOSTATE,
-                                            'suspending')
-        self.driver.suspend(instance_ref,
-            lambda result: self._update_state_callback(self,
-                                                       context,
-                                                       instance_id,
-                                                       result))
+        self.driver.suspend(instance_ref, lambda result: None)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.SUSPEND,
+                              task_state=None)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @checks_instance_lock
     def resume_instance(self, context, instance_id):
         """Resume the given suspended instance."""
+        LOG.audit(_('instance %s: resuming'), instance_id, context=context)
         context = context.elevated()
+
+        self._instance_update(context,
+                              instance_id,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=task_state.RESUMING)
+
         instance_ref = self.db.instance_get(context, instance_id)
-        LOG.audit(_('instance %s: resuming'), instance_id, context=context)
-        self.db.instance_set_state(context, instance_id,
-                                            power_state.NOSTATE,
-                                            'resuming')
-        self.driver.resume(instance_ref,
-            lambda result: self._update_state_callback(self,
-                                                       context,
-                                                       instance_id,
-                                                       result))
+        self.driver.resume(instance_ref, lambda result: None)
+
+        current_power_state = self._get_power_state(context, instance_ref)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None)
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     def lock_instance(self, context, instance_id):
@@ -1489,11 +1549,14 @@ class ComputeManager(manager.SchedulerDependentManager):
                                'block_migration': block_migration}})
 
         # Restore instance state
-        self.db.instance_update(ctxt,
-                                instance_ref['id'],
-                                {'state_description': 'running',
-                                 'state': power_state.RUNNING,
-                                 'host': dest})
+        current_power_state = self._get_power_state(ctxt, instance_ref)
+        self._instance_update(ctxt,
+                              instance_ref["id"],
+                              host=dest,
+                              power_state=current_power_state,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None)
+
         # Restore volume state
         for volume_ref in instance_ref['volumes']:
             volume_id = volume_ref['id']
@@ -1539,11 +1602,11 @@ class ComputeManager(manager.SchedulerDependentManager):
             This param specifies destination host.
         """
         host = instance_ref['host']
-        self.db.instance_update(context,
-                                instance_ref['id'],
-                                {'state_description': 'running',
-                                 'state': power_state.RUNNING,
-                                 'host': host})
+        self._instance_update(context,
+                              instance_ref['id'],
+                              host=host,
+                              vm_state=vm_state.ACTIVE,
+                              task_state=None)
 
         for volume_ref in instance_ref['volumes']:
             volume_id = volume_ref['id']
@@ -1591,10 +1654,9 @@ class ComputeManager(manager.SchedulerDependentManager):
             error_list.append(ex)
 
         try:
-            self._poll_instance_states(context)
+            self._sync_power_states(context)
         except Exception as ex:
-            LOG.warning(_("Error during instance poll: %s"),
-                        unicode(ex))
+            LOG.warning(_("Error during power_state sync: %s"), unicode(ex))
             error_list.append(ex)
 
         return error_list
@@ -1609,68 +1671,39 @@ class ComputeManager(manager.SchedulerDependentManager):
             self.update_service_capabilities(
                 self.driver.get_host_stats(refresh=True))
 
-    def _poll_instance_states(self, context):
-        vm_instances = self.driver.list_instances_detail()
-        vm_instances = dict((vm.name, vm) for vm in vm_instances)
+    def _sync_power_states(self, context):
+        """Align power states between the database and the hypervisor.
 
-        # Keep a list of VMs not in the DB, cross them off as we find them
-        vms_not_found_in_db = list(vm_instances.keys())
+        The hypervisor is authoritative for the power_state data, so we
+        simply loop over all known instances for this host and update the
+        power_state according to the hypervisor. If the instance is not found
+        then it will be set to power_state.NOSTATE, because it doesn't exist
+        on the hypervisor.
 
+        """
+        vm_instances = self.driver.list_instances_detail()
         db_instances = self.db.instance_get_all_by_host(context, self.host)
 
+        num_vm_instances = len(vm_instances)
+        num_db_instances = len(db_instances)
+
+        if num_vm_instances != num_db_instances:
+            LOG.info(_("Found %(num_db_instances)s in the database and "
+                       "%(num_vm_instances)s on the hypervisor.") % locals())
+
         for db_instance in db_instances:
-            name = db_instance['name']
-            db_state = db_instance['state']
+            name = db_instance["name"]
+            db_power_state = db_instance['power_state']
             vm_instance = vm_instances.get(name)
 
             if vm_instance is None:
-                # NOTE(justinsb): We have to be very careful here, because a
-                # concurrent operation could be in progress (e.g. a spawn)
-                if db_state == power_state.BUILDING:
-                    # TODO(justinsb): This does mean that if we crash during a
-                    # spawn, the machine will never leave the spawning state,
-                    # but this is just the way nova is; this function isn't
-                    # trying to correct that problem.
-                    # We could have a separate task to correct this error.
-                    # TODO(justinsb): What happens during a live migration?
-                    LOG.info(_("Found instance '%(name)s' in DB but no VM. "
-                               "State=%(db_state)s, so assuming spawn is in "
-                               "progress.") % locals())
-                    vm_state = db_state
-                else:
-                    LOG.info(_("Found instance '%(name)s' in DB but no VM. "
-                               "State=%(db_state)s, so setting state to "
-                               "shutoff.") % locals())
-                    vm_state = power_state.SHUTOFF
-                    if db_instance['state_description'] == 'stopping':
-                        self.db.instance_stop(context, db_instance['id'])
-                        continue
+                vm_power_state = power_state.NOSTATE
             else:
-                vm_state = vm_instance.state
-                vms_not_found_in_db.remove(name)
-
-            if (db_instance['state_description'] in ['migrating', 'stopping']):
-                # A situation which db record exists, but no instance"
-                # sometimes occurs while live-migration at src compute,
-                # this case should be ignored.
-                LOG.debug(_("Ignoring %(name)s, as it's currently being "
-                           "migrated.") % locals())
-                continue
-
-            if vm_state != db_state:
-                LOG.info(_("DB/VM state mismatch. Changing state from "
-                           "'%(db_state)s' to '%(vm_state)s'") % locals())
-                self._update_state(context, db_instance['id'], vm_state)
+                vm_power_state = vm_instance["power_state"]
 
-            # NOTE(justinsb): We no longer auto-remove SHUTOFF instances
-            # It's quite hard to get them back when we do.
-
-        # Are there VMs not in the DB?
-        for vm_not_found_in_db in vms_not_found_in_db:
-            name = vm_not_found_in_db
+            if vm_power_state == db_power_state:
+                continue
 
-            # We only care about instances that compute *should* know about
-            if name.startswith("instance-"):
-                # TODO(justinsb): What to do here?  Adopt it?  Shut it down?
-                LOG.warning(_("Found VM not in DB: '%(name)s'.  Ignoring")
-                            % locals())
+            self._instance_update(context,
+                                  db_instance["id"],
+                                  power_state=vm_power_state)
diff --git a/nova/compute/task_state.py b/nova/compute/task_state.py
index b4dc9af51..55466c783 100644
--- a/nova/compute/task_state.py
+++ b/nova/compute/task_state.py
@@ -17,12 +17,27 @@
 
 """Possible task states for instances"""
 
-BUILD_BLOCK_DEVICE_MAPPING='block_device_mapping'
+BLOCK_DEVICE_MAPPING='block_device_mapping'
 NETWORKING='networking'
+SPAWN='spawn'
 
+SNAPSHOT='snapshot'
+BACKUP='backup'
 PASSWORD='password'
 
 RESIZE_PREP='resize_prep'
 RESIZE_MIGRATING='resize_migrating'
 RESIZE_MIGRATED='resize_migrated'
 RESIZE_FINISH='resize_finish'
+
+REBUILDING='rebuilding'
+
+REBOOTING='rebooting'
+PAUSING='pausing'
+UNPAUSING='unpausing'
+SUSPENDING='suspending'
+RESUMING='resuming'
+
+RESCUING='rescuing'
+RESCUED='rescued'
+UNRESCUING='unrescuing'
diff --git a/nova/compute/vm_state.py b/nova/compute/vm_state.py
index e81cba1f0..a1bca6ef4 100644
--- a/nova/compute/vm_state.py
+++ b/nova/compute/vm_state.py
@@ -17,19 +17,15 @@
 
 """Possible vm states for instances"""
 
+ACTIVE='active'
 BUILD='build'
 REBUILD='rebuild'
 REBOOT='reboot'
 DELETE='delete'
 STOP='stop'
-START='start'
+MIGRATE='migrate'
 RESIZE='resize'
 VERIFY_RESIZE='verify_resize'
 PAUSE='pause'
-UNPAUSE='unpause'
-
 SUSPEND='suspend'
-RESUME='resume'
-
 RESCUE='rescue'
-UNRESCUE='unrescue'
author	Brian Lamar <brian.lamar@rackspace.com>	2011-08-17 16:23:40 -0400
committer	Brian Lamar <brian.lamar@rackspace.com>	2011-08-17 16:23:40 -0400
commit	1d1d027554d6be355bd9b52b2d87081d06f05045 (patch)
tree	5aab070465f439eef7a0b147abc09e8368dffee2
parent	bd2e98c064b7c1e9c866f3013e13af7883e11e05 (diff)