diff options
| author | Isaku Yamahata <yamahata@valinux.co.jp> | 2011-12-14 16:15:52 +0900 |
|---|---|---|
| committer | Isaku Yamahata <yamahata@valinux.co.jp> | 2012-01-12 18:46:49 +0900 |
| commit | 932f3aafd1c735a8ec9e158a94ebb983d6baeb0e (patch) | |
| tree | e167a00a6cbc64d31ff5f3c7a3a5b42d2c01306d /nova/compute | |
| parent | 6ece432be0cfb7356636806ab3c046eff17d494b (diff) | |
Unbreak start instance and fixes bug 905270
This patch fixes the bug 905270
https://bugs.launchpad.net/nova/+bug/905270
According to EC2 documentation, EBS-instances that initiated shutdown
result in stopped state. And then it can be started again. (On the
other hand non-EBS instance result in terminted when instance
initiated shutdown)
However, the current nova case, the shutdowned instance always results
in terminated status. As related issues are
- describe-instance-attribute instance_initiated_shutdown_behavior doesn't
work correctly
- instance attribute disable_api_termination isn't supported
- stop instance was broken by the change set of the following.
It needs unbreak.
> commit eb03d47fecd3bfc24243da29ee01679b334a08fe
> Author: Vishvananda Ishaya <vishvananda@gmail.com>
> Date: Fri Sep 23 09:22:32 2011 -0700
>
> Remove AoE, Clean up volume code
>
> * Removes Ata Over Ethernet
> * Adds drivers to libvirt for volumes
> * Adds initialize_connection and terminate_connection to volume api
> * Passes connection info back through volume api
>
> Change-Id: I1b1626f40bebe8466ab410fb174683293c7c474f
This patch
- unbreak start instance
- implement instance_initiated_shutdown_behavior and make it EC2 compatible
- implement disable_api_termination
---
Changes 5 -> 6:
- fixes to catch up 26b7b9457a5899ecca93fd67d3879efcad4e4968
Changes 4 -> 5:
- HACKING compilance
Changes 3 -> 4:
- rebased to 4c5586a28fd7a085369c49f6039876ffdc86b526
sqlalchemy migrate version
Changes 2 -> 3:
- rename long name to shorter one
s/instance_initiated_shutdown_behavior/shutdown_terminate/g
s/disable_api_termination/disable_terminate/g
as suggested Kevin L. Mitchell
- improved nova.api.ec2.cloud._state_description
- pep8
- broken out patches are available for easy review at
git://github.com/yamahata/nova.git lp905270-2
Changes 1 -> 2:
- fixed an unit test failure pointed out by Mark.
(I think ebtabls failure strongly suggests installation problem)
- introduce vm_states.SHUTOFF and put instance state which is in
power_state.{NOSTATE, SHUTOFF} into vm_states.SHUTOFF.
- simplified logic a bit by vm_states.SHUTOFF as suggested by Vish.
- instance_initiated_shutdown_behavior:String(255)
=>
instance_initiated_shutdown_terminate:Boolean()
as suggested by Vish.
- Added Johannes Erdfelt to reviews as they written the vm_states state
machine checker.
I'd have liked to add David Subiros either, but he doesn't seem to be a
registered user of the gerrit.
Change-Id: Ibeb94f65137feadad2c343913b39195e3f96a35e
Diffstat (limited to 'nova/compute')
| -rw-r--r-- | nova/compute/api.py | 70 | ||||
| -rw-r--r-- | nova/compute/manager.py | 25 | ||||
| -rw-r--r-- | nova/compute/vm_states.py | 1 |
3 files changed, 64 insertions, 32 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py index 13136079c..4b4985cd4 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -483,6 +483,11 @@ class API(base.Base): updates['vm_state'] = vm_states.BUILDING updates['task_state'] = task_states.SCHEDULING + if (image['properties'].get('mappings', []) or + image['properties'].get('block_device_mapping', []) or + block_device_mapping): + updates['shutdown_terminate'] = False + instance = self.update(context, instance, **updates) return instance @@ -771,13 +776,17 @@ class API(base.Base): rv = self.db.instance_update(context, instance["id"], kwargs) return dict(rv.iteritems()) - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR]) + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.ERROR]) @scheduler_api.reroute_compute("soft_delete") def soft_delete(self, context, instance): """Terminate an instance.""" instance_uuid = instance["uuid"] LOG.debug(_("Going to try to soft delete %s"), instance_uuid) + if instance['disable_terminate']: + return + # NOTE(jerdfelt): The compute daemon handles reclaiming instances # that are in soft delete. If there is no host assigned, there is # no daemon to reclaim, so delete it immediately. @@ -812,13 +821,17 @@ class API(base.Base): # NOTE(jerdfelt): The API implies that only ACTIVE and ERROR are # allowed but the EC2 API appears to allow from RESCUED and STOPPED # too - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR, - vm_states.RESCUED, vm_states.STOPPED]) + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.ERROR, vm_states.RESCUED, + vm_states.STOPPED]) @scheduler_api.reroute_compute("delete") def delete(self, context, instance): """Terminate an instance.""" LOG.debug(_("Going to try to terminate %s"), instance["uuid"]) + if instance['disable_terminate']: + return + self._delete(context, instance) @check_instance_state(vm_state=[vm_states.SOFT_DELETE]) @@ -845,10 +858,11 @@ class API(base.Base): """Force delete a previously deleted (but not reclaimed) instance.""" self._delete(context, instance) - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.RESCUED], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("stop") - def stop(self, context, instance): + def stop(self, context, instance, do_cast=True): """Stop an instance.""" instance_uuid = instance["uuid"] LOG.debug(_("Going to try to stop %s"), instance_uuid) @@ -861,21 +875,31 @@ class API(base.Base): progress=0) host = instance['host'] - if host: + if not host: + return + + if do_cast: self._cast_compute_message('stop_instance', context, instance_uuid, host) + else: + self._call_compute_message('stop_instance', context, instance) - @check_instance_state(vm_state=[vm_states.STOPPED]) + @check_instance_state(vm_state=[vm_states.STOPPED, vm_states.SHUTOFF]) def start(self, context, instance): """Start an instance.""" vm_state = instance["vm_state"] instance_uuid = instance["uuid"] LOG.debug(_("Going to try to start %s"), instance_uuid) - if vm_state != vm_states.STOPPED: - LOG.warning(_("Instance %(instance_uuid)s is not " - "stopped. (%(vm_state)s)") % locals()) - return + if vm_state == vm_states.SHUTOFF: + if instance['shutdown_terminate']: + LOG.warning(_("Instance %(instance_uuid)s is not " + "stopped. (%(vm_state)s") % locals()) + return + + # NOTE(yamahata): nova compute doesn't reap instances + # which initiated shutdown itself. So reap it here. + self.stop(context, instance, do_cast=False) self.update(context, instance, @@ -1077,7 +1101,7 @@ class API(base.Base): raise exception.Error(_("Unable to find host for Instance %s") % instance_uuid) - @check_instance_state(vm_state=[vm_states.ACTIVE], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("backup") def backup(self, context, instance, name, backup_type, rotation, @@ -1096,7 +1120,7 @@ class API(base.Base): extra_properties=extra_properties) return recv_meta - @check_instance_state(vm_state=[vm_states.ACTIVE], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("snapshot") def snapshot(self, context, instance, name, extra_properties=None): @@ -1175,7 +1199,8 @@ class API(base.Base): return min_ram, min_disk - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.RESCUED], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("reboot") def reboot(self, context, instance, reboot_type): @@ -1191,7 +1216,7 @@ class API(base.Base): instance['uuid'], params={'reboot_type': reboot_type}) - @check_instance_state(vm_state=[vm_states.ACTIVE], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("rebuild") def rebuild(self, context, instance, image_href, admin_password, **kwargs): @@ -1221,7 +1246,7 @@ class API(base.Base): instance["uuid"], params=rebuild_params) - @check_instance_state(vm_state=[vm_states.ACTIVE], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF], task_state=[task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("revert_resize") def revert_resize(self, context, instance): @@ -1247,7 +1272,7 @@ class API(base.Base): self.db.migration_update(context, migration_ref['id'], {'status': 'reverted'}) - @check_instance_state(vm_state=[vm_states.ACTIVE], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF], task_state=[task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("confirm_resize") def confirm_resize(self, context, instance): @@ -1275,7 +1300,7 @@ class API(base.Base): self.db.instance_update(context, instance['uuid'], {'host': migration_ref['dest_compute'], }) - @check_instance_state(vm_state=[vm_states.ACTIVE], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF], task_state=[None]) @scheduler_api.reroute_compute("resize") def resize(self, context, instance, flavor_id=None): @@ -1358,7 +1383,8 @@ class API(base.Base): # didn't raise so this is the correct zone self.network_api.add_network_to_project(context, project_id) - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.RESCUED], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("pause") def pause(self, context, instance): @@ -1408,7 +1434,8 @@ class API(base.Base): """Retrieve actions for the given instance.""" return self.db.instance_get_actions(context, instance['uuid']) - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.RESCUED], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.RESCUED], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("suspend") def suspend(self, context, instance): @@ -1431,7 +1458,8 @@ class API(base.Base): task_state=task_states.RESUMING) self._cast_compute_message('resume_instance', context, instance_uuid) - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED], + @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.SHUTOFF, + vm_states.STOPPED], task_state=[None, task_states.RESIZE_VERIFY]) @scheduler_api.reroute_compute("rescue") def rescue(self, context, instance, rescue_password=None): diff --git a/nova/compute/manager.py b/nova/compute/manager.py index cf373570d..58b179464 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -571,7 +571,7 @@ class ComputeManager(manager.SchedulerDependentManager): # I think start will fail due to the files still self._run_instance(context, instance_uuid) - def _shutdown_instance(self, context, instance, action_str, cleanup): + def _shutdown_instance(self, context, instance, action_str): """Shutdown an instance on this host.""" context = context.elevated() instance_id = instance['id'] @@ -592,7 +592,7 @@ class ComputeManager(manager.SchedulerDependentManager): bdms = self._get_instance_volume_bdms(context, instance_id) block_device_info = self._get_instance_volume_block_device_info( context, instance_id) - self.driver.destroy(instance, network_info, block_device_info, cleanup) + self.driver.destroy(instance, network_info, block_device_info) for bdm in bdms: try: # NOTE(vish): actual driver detach done in driver.destroy, so @@ -616,7 +616,7 @@ class ComputeManager(manager.SchedulerDependentManager): def _delete_instance(self, context, instance): """Delete an instance on this host.""" instance_id = instance['id'] - self._shutdown_instance(context, instance, 'Terminating', True) + self._shutdown_instance(context, instance, 'Terminating') self._cleanup_volumes(context, instance_id) self._instance_update(context, instance_id, @@ -646,12 +646,8 @@ class ComputeManager(manager.SchedulerDependentManager): @wrap_instance_fault def stop_instance(self, context, instance_uuid): """Stopping an instance on this host.""" - # FIXME(vish): I've kept the files during stop instance, but - # I think start will fail due to the files still - # existing. I don't really know what the purpose of - # stop and start are when compared to pause and unpause instance = self.db.instance_get_by_uuid(context, instance_uuid) - self._shutdown_instance(context, instance, 'Stopping', False) + self._shutdown_instance(context, instance, 'Stopping') self._instance_update(context, instance_uuid, vm_state=vm_states.STOPPED, @@ -2030,9 +2026,16 @@ class ComputeManager(manager.SchedulerDependentManager): if vm_power_state == db_power_state: continue - self._instance_update(context, - db_instance["id"], - power_state=vm_power_state) + if (vm_power_state in (power_state.NOSTATE, power_state.SHUTOFF) + and db_instance['vm_state'] == vm_states.ACTIVE): + self._instance_update(context, + db_instance["id"], + power_state=vm_power_state, + vm_state=vm_states.SHUTOFF) + else: + self._instance_update(context, + db_instance["id"], + power_state=vm_power_state) @manager.periodic_task def _reclaim_queued_deletes(self, context): diff --git a/nova/compute/vm_states.py b/nova/compute/vm_states.py index f219bf7f4..1d0aa6d62 100644 --- a/nova/compute/vm_states.py +++ b/nova/compute/vm_states.py @@ -29,6 +29,7 @@ REBUILDING = 'rebuilding' PAUSED = 'paused' SUSPENDED = 'suspended' +SHUTOFF = 'shutoff' RESCUED = 'rescued' DELETED = 'deleted' STOPPED = 'stopped' |
