Instance deletions in Openstack are immediate. This can cause data to be lost accidentally.

This branch adds a new configuration flag, reclaim_instance_interval. The default of 0 results in the same behavior before this patch, immediate deletion of the instance. Any value greater than 0 will result in the instance being powered off immediately and then later the instance will be reclaimed. New actions, restore and forceDelete allow a previously deleted instance to be restored, or reclaimed immediately.
author: Johannes Erdfelt <johannes.erdfelt@rackspace.com> 2011-09-21 15:54:30 +0000
committer: Tarmac <> 2011-09-21 15:54:30 +0000
commit: 7e3bebbe8e911851a7398b8d5ad81afb421dfd62 (patch)
tree: f633b43d1f5fa1eabac24300ba951a322bec1fed /nova/compute
parent: 1fc5abe0c63c6395e77c8031ae0a0b49e251f470 (diff)
parent: ad3f3d0f845fddb2658c427085e426e45b88ab4b (diff)
4 files changed, 145 insertions, 7 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index 76e1e7a60..1b35f061d 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -92,6 +92,19 @@ def _is_able_to_shutdown(instance, instance_id):
     return True
 
 
+def _is_queued_delete(instance, instance_id):
+    vm_state = instance["vm_state"]
+    task_state = instance["task_state"]
+
+    if vm_state != vm_states.SOFT_DELETE:
+        LOG.warn(_("Instance %(instance_id)s is not in a 'soft delete' "
+                   "state. It is currently %(vm_state)s. Action aborted.") %
+                 locals())
+        return False
+
+    return True
+
+
 class API(base.Base):
     """API for interacting with the compute manager."""
 
@@ -752,15 +765,85 @@ class API(base.Base):
                         {'instance_id': instance_id, 'action_str': action_str})
             raise
 
+    @scheduler_api.reroute_compute("soft_delete")
+    def soft_delete(self, context, instance_id):
+        """Terminate an instance."""
+        LOG.debug(_("Going to try to soft delete %s"), instance_id)
+        instance = self._get_instance(context, instance_id, 'soft delete')
+
+        if not _is_able_to_shutdown(instance, instance_id):
+            return
+
+        # NOTE(jerdfelt): The compute daemon handles reclaiming instances
+        # that are in soft delete. If there is no host assigned, there is
+        # no daemon to reclaim, so delete it immediately.
+        host = instance['host']
+        if host:
+            self.update(context,
+                        instance_id,
+                        vm_state=vm_states.SOFT_DELETE,
+                        task_state=task_states.POWERING_OFF,
+                        deleted_at=utils.utcnow())
+
+            self._cast_compute_message('power_off_instance', context,
+                                       instance_id, host)
+        else:
+            LOG.warning(_("No host for instance %s, deleting immediately"),
+                        instance_id)
+            terminate_volumes(self.db, context, instance_id)
+            self.db.instance_destroy(context, instance_id)
+
     @scheduler_api.reroute_compute("delete")
     def delete(self, context, instance_id):
         """Terminate an instance."""
         LOG.debug(_("Going to try to terminate %s"), instance_id)
-        instance = self._get_instance(context, instance_id, 'terminating')
+        instance = self._get_instance(context, instance_id, 'delete')
 
         if not _is_able_to_shutdown(instance, instance_id):
             return
 
+        host = instance['host']
+        if host:
+            self.update(context,
+                        instance_id,
+                        task_state=task_states.DELETING)
+
+            self._cast_compute_message('terminate_instance', context,
+                                       instance_id, host)
+        else:
+            terminate_volumes(self.db, context, instance_id)
+            self.db.instance_destroy(context, instance_id)
+
+    @scheduler_api.reroute_compute("restore")
+    def restore(self, context, instance_id):
+        """Restore a previously deleted (but not reclaimed) instance."""
+        instance = self._get_instance(context, instance_id, 'restore')
+
+        if not _is_queued_delete(instance, instance_id):
+            return
+
+        self.update(context,
+                    instance_id,
+                    vm_state=vm_states.ACTIVE,
+                    task_state=None,
+                    deleted_at=None)
+
+        host = instance['host']
+        if host:
+            self.update(context,
+                        instance_id,
+                        task_state=task_states.POWERING_ON)
+            self._cast_compute_message('power_on_instance', context,
+                    instance_id, host)
+
+    @scheduler_api.reroute_compute("force_delete")
+    def force_delete(self, context, instance_id):
+        """Force delete a previously deleted (but not reclaimed) instance."""
+        instance = self._get_instance(context, instance_id, 'force delete')
+
+        if not _is_queued_delete(instance, instance_id):
+            return
+
         self.update(context,
                     instance_id,
                     task_state=task_states.DELETING)
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index cb5d10f83..d7c23c65d 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -35,12 +35,13 @@ terminating it.
 
 """
 
+import datetime
+import functools
 import os
 import socket
 import sys
 import tempfile
 import time
-import functools
 
 from eventlet import greenthread
 
@@ -84,6 +85,8 @@ flags.DEFINE_integer("resize_confirm_window", 0,
                      " Set to 0 to disable.")
 flags.DEFINE_integer('host_state_interval', 120,
                      'Interval in seconds for querying the host status')
+flags.DEFINE_integer('reclaim_instance_interval', 0,
+                     'Interval in seconds for reclaiming deleted instances')
 
 LOG = logging.getLogger('nova.compute.manager')
 
@@ -175,7 +178,7 @@ class ComputeManager(manager.SchedulerDependentManager):
                             'nova-compute restart.'), locals())
                 self.reboot_instance(context, instance['id'])
             elif drv_state == power_state.RUNNING:
-                # Hyper-V and VMWareAPI drivers will raise and exception
+                # Hyper-V and VMWareAPI drivers will raise an exception
                 try:
                     net_info = self._get_instance_nw_info(context, instance)
                     self.driver.ensure_filtering_rules_for_instance(instance,
@@ -487,10 +490,8 @@ class ComputeManager(manager.SchedulerDependentManager):
         if action_str == 'Terminating':
             terminate_volumes(self.db, context, instance_id)
 
-    @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
-    @checks_instance_lock
-    def terminate_instance(self, context, instance_id):
-        """Terminate an instance on this host."""
+    def _delete_instance(self, context, instance_id):
+        """Delete an instance on this host."""
         self._shutdown_instance(context, instance_id, 'Terminating')
         instance = self.db.instance_get(context.elevated(), instance_id)
         self._instance_update(context,
@@ -508,6 +509,12 @@ class ComputeManager(manager.SchedulerDependentManager):
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @checks_instance_lock
+    def terminate_instance(self, context, instance_id):
+        """Terminate an instance on this host."""
+        self._delete_instance(context, instance_id)
+
+    @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
+    @checks_instance_lock
     def stop_instance(self, context, instance_id):
         """Stopping an instance on this host."""
         self._shutdown_instance(context, instance_id, 'Stopping')
@@ -518,6 +525,30 @@ class ComputeManager(manager.SchedulerDependentManager):
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @checks_instance_lock
+    def power_off_instance(self, context, instance_id):
+        """Power off an instance on this host."""
+        instance = self.db.instance_get(context, instance_id)
+        self.driver.power_off(instance)
+        current_power_state = self._get_power_state(context, instance)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              task_state=None)
+
+    @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
+    @checks_instance_lock
+    def power_on_instance(self, context, instance_id):
+        """Power on an instance on this host."""
+        instance = self.db.instance_get(context, instance_id)
+        self.driver.power_on(instance)
+        current_power_state = self._get_power_state(context, instance)
+        self._instance_update(context,
+                              instance_id,
+                              power_state=current_power_state,
+                              task_state=None)
+
+    @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
+    @checks_instance_lock
     def rebuild_instance(self, context, instance_id, **kwargs):
         """Destroy and re-make this instance.
 
@@ -1676,6 +1707,13 @@ class ComputeManager(manager.SchedulerDependentManager):
             LOG.warning(_("Error during power_state sync: %s"), unicode(ex))
             error_list.append(ex)
 
+        try:
+            self._reclaim_queued_deletes(context)
+        except Exception as ex:
+            LOG.warning(_("Error during reclamation of queued deletes: %s"),
+                        unicode(ex))
+            error_list.append(ex)
+
         return error_list
 
     def _report_driver_status(self):
@@ -1725,3 +1763,17 @@ class ComputeManager(manager.SchedulerDependentManager):
             self._instance_update(context,
                                   db_instance["id"],
                                   power_state=vm_power_state)
+
+    def _reclaim_queued_deletes(self, context):
+        """Reclaim instances that are queued for deletion."""
+
+        instances = self.db.instance_get_all_by_host(context, self.host)
+
+        queue_time = datetime.timedelta(
+                         seconds=FLAGS.reclaim_instance_interval)
+        curtime = utils.utcnow()
+        for instance in instances:
+            if instance['vm_state'] == vm_states.SOFT_DELETE and \
+               (curtime - instance['deleted_at']) >= queue_time:
+                LOG.info('Deleting %s' % instance['name'])
+                self._delete_instance(context, instance['id'])
diff --git a/nova/compute/task_states.py b/nova/compute/task_states.py
index e3315a542..b52140bf8 100644
--- a/nova/compute/task_states.py
+++ b/nova/compute/task_states.py
@@ -50,6 +50,8 @@ PAUSING = 'pausing'
 UNPAUSING = 'unpausing'
 SUSPENDING = 'suspending'
 RESUMING = 'resuming'
+POWERING_OFF = 'powering-off'
+POWERING_ON = 'powering-on'
 
 RESCUING = 'rescuing'
 UNRESCUING = 'unrescuing'
diff --git a/nova/compute/vm_states.py b/nova/compute/vm_states.py
index 6f16c1f09..f219bf7f4 100644
--- a/nova/compute/vm_states.py
+++ b/nova/compute/vm_states.py
@@ -32,6 +32,7 @@ SUSPENDED = 'suspended'
 RESCUED = 'rescued'
 DELETED = 'deleted'
 STOPPED = 'stopped'
+SOFT_DELETE = 'soft-delete'
 
 MIGRATING = 'migrating'
 RESIZING = 'resizing'
author	Johannes Erdfelt <johannes.erdfelt@rackspace.com>	2011-09-21 15:54:30 +0000
committer	Tarmac <>	2011-09-21 15:54:30 +0000
commit	7e3bebbe8e911851a7398b8d5ad81afb421dfd62 (patch)
tree	f633b43d1f5fa1eabac24300ba951a322bec1fed /nova/compute
parent	1fc5abe0c63c6395e77c8031ae0a0b49e251f470 (diff)
parent	ad3f3d0f845fddb2658c427085e426e45b88ab4b (diff)