Unbreak start instance and fixes bug 905270

This patch fixes the bug 905270 https://bugs.launchpad.net/nova/+bug/905270 According to EC2 documentation, EBS-instances that initiated shutdown result in stopped state. And then it can be started again. (On the other hand non-EBS instance result in terminted when instance initiated shutdown) However, the current nova case, the shutdowned instance always results in terminated status. As related issues are - describe-instance-attribute instance_initiated_shutdown_behavior doesn't work correctly - instance attribute disable_api_termination isn't supported - stop instance was broken by the change set of the following. It needs unbreak. > commit eb03d47fecd3bfc24243da29ee01679b334a08fe > Author: Vishvananda Ishaya <vishvananda@gmail.com> > Date: Fri Sep 23 09:22:32 2011 -0700 > > Remove AoE, Clean up volume code > > * Removes Ata Over Ethernet > * Adds drivers to libvirt for volumes > * Adds initialize_connection and terminate_connection to volume api > * Passes connection info back through volume api > > Change-Id: I1b1626f40bebe8466ab410fb174683293c7c474f This patch - unbreak start instance - implement instance_initiated_shutdown_behavior and make it EC2 compatible - implement disable_api_termination --- Changes 5 -> 6: - fixes to catch up 26b7b9457a5899ecca93fd67d3879efcad4e4968 Changes 4 -> 5: - HACKING compilance Changes 3 -> 4: - rebased to 4c5586a28fd7a085369c49f6039876ffdc86b526 sqlalchemy migrate version Changes 2 -> 3: - rename long name to shorter one s/instance_initiated_shutdown_behavior/shutdown_terminate/g s/disable_api_termination/disable_terminate/g as suggested Kevin L. Mitchell - improved nova.api.ec2.cloud._state_description - pep8 - broken out patches are available for easy review at git://github.com/yamahata/nova.git lp905270-2 Changes 1 -> 2: - fixed an unit test failure pointed out by Mark. (I think ebtabls failure strongly suggests installation problem) - introduce vm_states.SHUTOFF and put instance state which is in power_state.{NOSTATE, SHUTOFF} into vm_states.SHUTOFF. - simplified logic a bit by vm_states.SHUTOFF as suggested by Vish. - instance_initiated_shutdown_behavior:String(255) => instance_initiated_shutdown_terminate:Boolean() as suggested by Vish. - Added Johannes Erdfelt to reviews as they written the vm_states state machine checker. I'd have liked to add David Subiros either, but he doesn't seem to be a registered user of the gerrit. Change-Id: Ibeb94f65137feadad2c343913b39195e3f96a35e
author: Isaku Yamahata <yamahata@valinux.co.jp> 2011-12-14 16:15:52 +0900
committer: Isaku Yamahata <yamahata@valinux.co.jp> 2012-01-12 18:46:49 +0900
commit: 932f3aafd1c735a8ec9e158a94ebb983d6baeb0e (patch)
tree: e167a00a6cbc64d31ff5f3c7a3a5b42d2c01306d /nova/compute
parent: 6ece432be0cfb7356636806ab3c046eff17d494b (diff)
3 files changed, 64 insertions, 32 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index 13136079c..4b4985cd4 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -483,6 +483,11 @@ class API(base.Base):
         updates['vm_state'] = vm_states.BUILDING
         updates['task_state'] = task_states.SCHEDULING
 
+        if (image['properties'].get('mappings', []) or
+            image['properties'].get('block_device_mapping', []) or
+            block_device_mapping):
+            updates['shutdown_terminate'] = False
+
         instance = self.update(context, instance, **updates)
         return instance
 
@@ -771,13 +776,17 @@ class API(base.Base):
         rv = self.db.instance_update(context, instance["id"], kwargs)
         return dict(rv.iteritems())
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.ERROR])
     @scheduler_api.reroute_compute("soft_delete")
     def soft_delete(self, context, instance):
         """Terminate an instance."""
         instance_uuid = instance["uuid"]
         LOG.debug(_("Going to try to soft delete %s"), instance_uuid)
 
+        if instance['disable_terminate']:
+            return
+
         # NOTE(jerdfelt): The compute daemon handles reclaiming instances
         # that are in soft delete. If there is no host assigned, there is
         # no daemon to reclaim, so delete it immediately.
@@ -812,13 +821,17 @@ class API(base.Base):
     # NOTE(jerdfelt): The API implies that only ACTIVE and ERROR are
     # allowed but the EC2 API appears to allow from RESCUED and STOPPED
     # too
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR,
-                                    vm_states.RESCUED, vm_states.STOPPED])
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.ERROR, vm_states.RESCUED,
+                                    vm_states.STOPPED])
     @scheduler_api.reroute_compute("delete")
     def delete(self, context, instance):
         """Terminate an instance."""
         LOG.debug(_("Going to try to terminate %s"), instance["uuid"])
 
+        if instance['disable_terminate']:
+            return
+
         self._delete(context, instance)
 
     @check_instance_state(vm_state=[vm_states.SOFT_DELETE])
@@ -845,10 +858,11 @@ class API(base.Base):
         """Force delete a previously deleted (but not reclaimed) instance."""
         self._delete(context, instance)
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.RESCUED],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("stop")
-    def stop(self, context, instance):
+    def stop(self, context, instance, do_cast=True):
         """Stop an instance."""
         instance_uuid = instance["uuid"]
         LOG.debug(_("Going to try to stop %s"), instance_uuid)
@@ -861,21 +875,31 @@ class API(base.Base):
                     progress=0)
 
         host = instance['host']
-        if host:
+        if not host:
+            return
+
+        if do_cast:
             self._cast_compute_message('stop_instance', context,
                     instance_uuid, host)
+        else:
+            self._call_compute_message('stop_instance', context, instance)
 
-    @check_instance_state(vm_state=[vm_states.STOPPED])
+    @check_instance_state(vm_state=[vm_states.STOPPED, vm_states.SHUTOFF])
     def start(self, context, instance):
         """Start an instance."""
         vm_state = instance["vm_state"]
         instance_uuid = instance["uuid"]
         LOG.debug(_("Going to try to start %s"), instance_uuid)
 
-        if vm_state != vm_states.STOPPED:
-            LOG.warning(_("Instance %(instance_uuid)s is not "
-                          "stopped. (%(vm_state)s)") % locals())
-            return
+        if vm_state == vm_states.SHUTOFF:
+            if instance['shutdown_terminate']:
+                LOG.warning(_("Instance %(instance_uuid)s is not "
+                              "stopped. (%(vm_state)s") % locals())
+                return
+
+            # NOTE(yamahata): nova compute doesn't reap instances
+            # which initiated shutdown itself. So reap it here.
+            self.stop(context, instance, do_cast=False)
 
         self.update(context,
                     instance,
@@ -1077,7 +1101,7 @@ class API(base.Base):
         raise exception.Error(_("Unable to find host for Instance %s")
                                 % instance_uuid)
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("backup")
     def backup(self, context, instance, name, backup_type, rotation,
@@ -1096,7 +1120,7 @@ class API(base.Base):
                             extra_properties=extra_properties)
         return recv_meta
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("snapshot")
     def snapshot(self, context, instance, name, extra_properties=None):
@@ -1175,7 +1199,8 @@ class API(base.Base):
 
         return min_ram, min_disk
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.RESCUED],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("reboot")
     def reboot(self, context, instance, reboot_type):
@@ -1191,7 +1216,7 @@ class API(base.Base):
                                    instance['uuid'],
                                    params={'reboot_type': reboot_type})
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("rebuild")
     def rebuild(self, context, instance, image_href, admin_password, **kwargs):
@@ -1221,7 +1246,7 @@ class API(base.Base):
                                    instance["uuid"],
                                    params=rebuild_params)
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF],
                           task_state=[task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("revert_resize")
     def revert_resize(self, context, instance):
@@ -1247,7 +1272,7 @@ class API(base.Base):
         self.db.migration_update(context, migration_ref['id'],
                                  {'status': 'reverted'})
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF],
                           task_state=[task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("confirm_resize")
     def confirm_resize(self, context, instance):
@@ -1275,7 +1300,7 @@ class API(base.Base):
         self.db.instance_update(context, instance['uuid'],
                 {'host': migration_ref['dest_compute'], })
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF],
                           task_state=[None])
     @scheduler_api.reroute_compute("resize")
     def resize(self, context, instance, flavor_id=None):
@@ -1358,7 +1383,8 @@ class API(base.Base):
         # didn't raise so this is the correct zone
         self.network_api.add_network_to_project(context, project_id)
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.RESCUED],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("pause")
     def pause(self, context, instance):
@@ -1408,7 +1434,8 @@ class API(base.Base):
         """Retrieve actions for the given instance."""
         return self.db.instance_get_actions(context, instance['uuid'])
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.RESCUED],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("suspend")
     def suspend(self, context, instance):
@@ -1431,7 +1458,8 @@ class API(base.Base):
                     task_state=task_states.RESUMING)
         self._cast_compute_message('resume_instance', context, instance_uuid)
 
-    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED],
+    @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF,
+                                    vm_states.STOPPED],
                           task_state=[None, task_states.RESIZE_VERIFY])
     @scheduler_api.reroute_compute("rescue")
     def rescue(self, context, instance, rescue_password=None):
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index cf373570d..58b179464 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -571,7 +571,7 @@ class ComputeManager(manager.SchedulerDependentManager):
         #              I think start will fail due to the files still
         self._run_instance(context, instance_uuid)
 
-    def _shutdown_instance(self, context, instance, action_str, cleanup):
+    def _shutdown_instance(self, context, instance, action_str):
         """Shutdown an instance on this host."""
         context = context.elevated()
         instance_id = instance['id']
@@ -592,7 +592,7 @@ class ComputeManager(manager.SchedulerDependentManager):
         bdms = self._get_instance_volume_bdms(context, instance_id)
         block_device_info = self._get_instance_volume_block_device_info(
             context, instance_id)
-        self.driver.destroy(instance, network_info, block_device_info, cleanup)
+        self.driver.destroy(instance, network_info, block_device_info)
         for bdm in bdms:
             try:
                 # NOTE(vish): actual driver detach done in driver.destroy, so
@@ -616,7 +616,7 @@ class ComputeManager(manager.SchedulerDependentManager):
     def _delete_instance(self, context, instance):
         """Delete an instance on this host."""
         instance_id = instance['id']
-        self._shutdown_instance(context, instance, 'Terminating', True)
+        self._shutdown_instance(context, instance, 'Terminating')
         self._cleanup_volumes(context, instance_id)
         self._instance_update(context,
                               instance_id,
@@ -646,12 +646,8 @@ class ComputeManager(manager.SchedulerDependentManager):
     @wrap_instance_fault
     def stop_instance(self, context, instance_uuid):
         """Stopping an instance on this host."""
-        # FIXME(vish): I've kept the files during stop instance, but
-        #              I think start will fail due to the files still
-        #              existing.  I don't really know what the purpose of
-        #              stop and start are when compared to pause and unpause
         instance = self.db.instance_get_by_uuid(context, instance_uuid)
-        self._shutdown_instance(context, instance, 'Stopping', False)
+        self._shutdown_instance(context, instance, 'Stopping')
         self._instance_update(context,
                               instance_uuid,
                               vm_state=vm_states.STOPPED,
@@ -2030,9 +2026,16 @@ class ComputeManager(manager.SchedulerDependentManager):
             if vm_power_state == db_power_state:
                 continue
 
-            self._instance_update(context,
-                                  db_instance["id"],
-                                  power_state=vm_power_state)
+            if (vm_power_state in (power_state.NOSTATE, power_state.SHUTOFF)
+                and db_instance['vm_state'] == vm_states.ACTIVE):
+                self._instance_update(context,
+                                      db_instance["id"],
+                                      power_state=vm_power_state,
+                                      vm_state=vm_states.SHUTOFF)
+            else:
+                self._instance_update(context,
+                                      db_instance["id"],
+                                      power_state=vm_power_state)
 
     @manager.periodic_task
     def _reclaim_queued_deletes(self, context):
diff --git a/nova/compute/vm_states.py b/nova/compute/vm_states.py
index f219bf7f4..1d0aa6d62 100644
--- a/nova/compute/vm_states.py
+++ b/nova/compute/vm_states.py
@@ -29,6 +29,7 @@ REBUILDING = 'rebuilding'
 
 PAUSED = 'paused'
 SUSPENDED = 'suspended'
+SHUTOFF = 'shutoff'
 RESCUED = 'rescued'
 DELETED = 'deleted'
 STOPPED = 'stopped'
author	Isaku Yamahata <yamahata@valinux.co.jp>	2011-12-14 16:15:52 +0900
committer	Isaku Yamahata <yamahata@valinux.co.jp>	2012-01-12 18:46:49 +0900
commit	932f3aafd1c735a8ec9e158a94ebb983d6baeb0e (patch)
tree	e167a00a6cbc64d31ff5f3c7a3a5b42d2c01306d /nova/compute
parent	6ece432be0cfb7356636806ab3c046eff17d494b (diff)