Fix quota issues with instance deletes.

In order to keep quotas in sync as much as possible, only commit quota changes for delete when: 1) An instance's vm_state is updated to be SOFT_DELETED. 2) The DB record is marked as deleted (and the instance's vm_state is not SOFT_DELETED) If a host is down and we delete the instance in the API, this means quotas are committed within the API. Otherwise, quotas are committed on the manager side. Fixes bug 1098380 Also needed for proper testing: Fixed compute cells tests so that pseudo child cells use NoopQuotaDriver. This uncovered inconsistencies in the NoopQuotaDriver wrt the DBQuotaDriver. Those issues were fixed as well. Change-Id: Ib72de1a457f0c5056d55a5c7dd4d8d7c69708996
author: Chris Behrens <cbehrens@codestud.com> 2013-03-11 00:20:23 -0700
committer: Chris Behrens <cbehrens@codestud.com> 2013-03-11 19:26:49 -0700
commit: 652a487ed9daba9ae97f7df77ae35720322d1af3 (patch)
tree: d21de2ac493af0334aa7f4942e2893e141861006 /nova/compute
parent: f543f347c84e7f5de2c584ca55363e4dee5b0a3d (diff)
3 files changed, 123 insertions, 75 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index dc90748a4..5ee6d1c52 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -1031,15 +1031,13 @@ class API(base.Base):
                                         instance,
                                         **attrs)
 
-            # Avoid double-counting the quota usage reduction
-            # where delete is already in progress
-            if (old['vm_state'] != vm_states.SOFT_DELETED and
-                old['task_state'] not in (task_states.DELETING,
-                                          task_states.SOFT_DELETING)):
-                reservations = self._create_reservations(context,
-                                                         old,
-                                                         updated,
-                                                         project_id)
+            # NOTE(comstud): If we delete the instance locally, we'll
+            # commit the reservations here.  Otherwise, the manager side
+            # will commit or rollback the reservations based on success.
+            reservations = self._create_reservations(context,
+                                                     old,
+                                                     updated,
+                                                     project_id)
 
             if not host:
                 # Just update database, nothing else we can do
@@ -1099,17 +1097,18 @@ class API(base.Base):
                     self._record_action_start(context, instance,
                                               instance_actions.DELETE)
 
-                    cb(context, instance, bdms)
+                    cb(context, instance, bdms, reservations=reservations)
             except exception.ComputeHostNotFound:
                 pass
 
             if not is_up:
                 # If compute node isn't up, just delete from DB
                 self._local_delete(context, instance, bdms)
-            if reservations:
-                QUOTAS.commit(context,
-                              reservations,
-                              project_id=project_id)
+                if reservations:
+                    QUOTAS.commit(context,
+                                  reservations,
+                                  project_id=project_id)
+                    reservations = None
         except exception.InstanceNotFound:
             # NOTE(comstud): Race condition. Instance already gone.
             if reservations:
@@ -1210,16 +1209,18 @@ class API(base.Base):
         LOG.debug(_('Going to try to soft delete instance'),
                   instance=instance)
 
-        def soft_delete(context, instance, bdms):
-            self.compute_rpcapi.soft_delete_instance(context, instance)
+        def soft_delete(context, instance, bdms, reservations=None):
+            self.compute_rpcapi.soft_delete_instance(context, instance,
+                    reservations=reservations)
 
         self._delete(context, instance, soft_delete,
                      task_state=task_states.SOFT_DELETING,
                      deleted_at=timeutils.utcnow())
 
     def _delete_instance(self, context, instance):
-        def terminate(context, instance, bdms):
-            self.compute_rpcapi.terminate_instance(context, instance, bdms)
+        def terminate(context, instance, bdms, reservations=None):
+            self.compute_rpcapi.terminate_instance(context, instance, bdms,
+                    reservations=reservations)
 
         self._delete(context, instance, terminate,
                      task_state=task_states.DELETING)
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 338708f4e..b1c5a9a7e 100755
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -67,7 +67,6 @@ from nova.openstack.common.notifier import api as notifier
 from nova.openstack.common import rpc
 from nova.openstack.common import timeutils
 from nova import paths
-from nova import quota
 from nova import safe_utils
 from nova.scheduler import rpcapi as scheduler_rpcapi
 from nova import utils
@@ -178,8 +177,6 @@ CONF.import_opt('my_ip', 'nova.netconf')
 CONF.import_opt('vnc_enabled', 'nova.vnc')
 CONF.import_opt('enabled', 'nova.spice', group='spice')
 
-QUOTAS = quota.QUOTAS
-
 LOG = logging.getLogger(__name__)
 
 
@@ -325,7 +322,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
 class ComputeManager(manager.SchedulerDependentManager):
     """Manages the running instances from creation to destruction."""
 
-    RPC_API_VERSION = '2.26'
+    RPC_API_VERSION = '2.27'
 
     def __init__(self, compute_driver=None, *args, **kwargs):
         """Load configuration options and connect to the hypervisor."""
@@ -1225,35 +1222,63 @@ class ComputeManager(manager.SchedulerDependentManager):
             # NOTE(vish): bdms will be deleted on instance destroy
 
     @hooks.add_hook("delete_instance")
-    def _delete_instance(self, context, instance, bdms):
-        """Delete an instance on this host."""
+    def _delete_instance(self, context, instance, bdms,
+                         reservations=None):
+        """Delete an instance on this host.  Commit or rollback quotas
+        as necessary.
+        """
         instance_uuid = instance['uuid']
-        self.conductor_api.instance_info_cache_delete(context, instance)
-        self._notify_about_instance_usage(context, instance, "delete.start")
-        self._shutdown_instance(context, instance, bdms)
-        # NOTE(vish): We have already deleted the instance, so we have
-        #             to ignore problems cleaning up the volumes. It would
-        #             be nice to let the user know somehow that the volume
-        #             deletion failed, but it is not acceptable to have an
-        #             instance that can not be deleted. Perhaps this could
-        #             be reworked in the future to set an instance fault
-        #             the first time and to only ignore the failure if the
-        #             instance is already in ERROR.
+
+        if context.is_admin and context.project_id != instance['project_id']:
+            project_id = instance['project_id']
+        else:
+            project_id = context.project_id
+
+        was_soft_deleted = instance['vm_state'] == vm_states.SOFT_DELETED
+        if was_soft_deleted:
+            # Instances in SOFT_DELETED vm_state have already had quotas
+            # decremented.
+            try:
+                self._quota_rollback(context, reservations,
+                                     project_id=project_id)
+            except Exception:
+                pass
+            reservations = None
+
         try:
-            self._cleanup_volumes(context, instance_uuid, bdms)
-        except Exception as exc:
-            LOG.warn(_("Ignoring volume cleanup failure due to %s") % exc,
-                     instance_uuid=instance_uuid)
-        # if a delete task succeed, always update vm state and task state
-        # without expecting task state to be DELETING
-        instance = self._instance_update(context,
-                                         instance_uuid,
-                                         vm_state=vm_states.DELETED,
-                                         task_state=None,
-                                         terminated_at=timeutils.utcnow())
-        system_meta = utils.metadata_to_dict(instance['system_metadata'])
-        self.conductor_api.instance_destroy(context, instance)
+            self.conductor_api.instance_info_cache_delete(context, instance)
+            self._notify_about_instance_usage(context, instance,
+                                              "delete.start")
+            self._shutdown_instance(context, instance, bdms)
+            # NOTE(vish): We have already deleted the instance, so we have
+            #             to ignore problems cleaning up the volumes. It
+            #             would be nice to let the user know somehow that
+            #             the volume deletion failed, but it is not
+            #             acceptable to have an instance that can not be
+            #             deleted. Perhaps this could be reworked in the
+            #             future to set an instance fault the first time
+            #             and to only ignore the failure if the instance
+            #             is already in ERROR.
+            try:
+                self._cleanup_volumes(context, instance_uuid, bdms)
+            except Exception as exc:
+                err_str = _("Ignoring volume cleanup failure due to %s")
+                LOG.warn(err_str % exc, instance=instance)
+            # if a delete task succeed, always update vm state and task
+            # state without expecting task state to be DELETING
+            instance = self._instance_update(context,
+                                             instance_uuid,
+                                             vm_state=vm_states.DELETED,
+                                             task_state=None,
+                                             terminated_at=timeutils.utcnow())
+            system_meta = utils.metadata_to_dict(instance['system_metadata'])
+            self.conductor_api.instance_destroy(context, instance)
+        except Exception:
+            with excutils.save_and_reraise_exception():
+                self._quota_rollback(context, reservations,
+                                     project_id=project_id)
 
+        self._quota_commit(context, reservations, project_id=project_id)
         # ensure block device mappings are not leaked
         self.conductor_api.block_device_mapping_destroy(context, bdms)
 
@@ -1267,7 +1292,8 @@ class ComputeManager(manager.SchedulerDependentManager):
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
     @wrap_instance_event
     @wrap_instance_fault
-    def terminate_instance(self, context, instance, bdms=None):
+    def terminate_instance(self, context, instance, bdms=None,
+                           reservations=None):
         """Terminate an instance on this host."""
         # Note(eglynn): we do not decorate this action with reverts_task_state
         # because a failure during termination should leave the task state as
@@ -1275,7 +1301,6 @@ class ComputeManager(manager.SchedulerDependentManager):
         # attempt should not result in a further decrement of the quota_usages
         # in_use count (see bug 1046236).
 
-        elevated = context.elevated()
         # NOTE(danms): remove this compatibility in the future
         if not bdms:
             bdms = self._get_instance_volume_bdms(context, instance)
@@ -1283,7 +1308,8 @@ class ComputeManager(manager.SchedulerDependentManager):
         @lockutils.synchronized(instance['uuid'], 'nova-')
         def do_terminate_instance(instance, bdms):
             try:
-                self._delete_instance(context, instance, bdms)
+                self._delete_instance(context, instance, bdms,
+                                      reservations=reservations)
             except exception.InstanceTerminationFailure as error:
                 msg = _('%s. Setting instance vm_state to ERROR')
                 LOG.error(msg % error, instance=instance)
@@ -1337,22 +1363,34 @@ class ComputeManager(manager.SchedulerDependentManager):
     @reverts_task_state
     @wrap_instance_event
     @wrap_instance_fault
-    def soft_delete_instance(self, context, instance):
+    def soft_delete_instance(self, context, instance, reservations=None):
         """Soft delete an instance on this host."""
-        self._notify_about_instance_usage(context, instance,
-                                          "soft_delete.start")
+
+        if context.is_admin and context.project_id != instance['project_id']:
+            project_id = instance['project_id']
+        else:
+            project_id = context.project_id
+
         try:
-            self.driver.soft_delete(instance)
-        except NotImplementedError:
-            # Fallback to just powering off the instance if the hypervisor
-            # doesn't implement the soft_delete method
-            self.driver.power_off(instance)
-        current_power_state = self._get_power_state(context, instance)
-        instance = self._instance_update(context, instance['uuid'],
-                power_state=current_power_state,
-                vm_state=vm_states.SOFT_DELETED,
-                expected_task_state=task_states.SOFT_DELETING,
-                task_state=None)
+            self._notify_about_instance_usage(context, instance,
+                                              "soft_delete.start")
+            try:
+                self.driver.soft_delete(instance)
+            except NotImplementedError:
+                # Fallback to just powering off the instance if the
+                # hypervisor doesn't implement the soft_delete method
+                self.driver.power_off(instance)
+            current_power_state = self._get_power_state(context, instance)
+            instance = self._instance_update(context, instance['uuid'],
+                    power_state=current_power_state,
+                    vm_state=vm_states.SOFT_DELETED,
+                    expected_task_state=task_states.SOFT_DELETING,
+                    task_state=None)
+        except Exception:
+            with excutils.save_and_reraise_exception():
+                self._quota_rollback(context, reservations,
+                                     project_id=project_id)
+        self._quota_commit(context, reservations, project_id=project_id)
         self._notify_about_instance_usage(context, instance, "soft_delete.end")
 
     @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@@ -2080,13 +2118,15 @@ class ComputeManager(manager.SchedulerDependentManager):
 
             self._quota_commit(context, reservations)
 
-    def _quota_commit(self, context, reservations):
+    def _quota_commit(self, context, reservations, project_id=None):
         if reservations:
-            self.conductor_api.quota_commit(context, reservations)
+            self.conductor_api.quota_commit(context, reservations,
+                                            project_id=project_id)
 
-    def _quota_rollback(self, context, reservations):
+    def _quota_rollback(self, context, reservations, project_id=None):
         if reservations:
-            self.conductor_api.quota_rollback(context, reservations)
+            self.conductor_api.quota_rollback(context, reservations,
+                                              project_id=project_id)
 
     def _prep_resize(self, context, image, instance, instance_type,
             reservations, request_spec, filter_properties, node):
@@ -3709,6 +3749,9 @@ class ComputeManager(manager.SchedulerDependentManager):
                 bdms = capi.block_device_mapping_get_all_by_instance(
                     context, instance)
                 LOG.info(_('Reclaiming deleted instance'), instance=instance)
+                # NOTE(comstud): Quotas were already accounted for when
+                # the instance was soft deleted, so there's no need to
+                # pass reservations here.
                 self._delete_instance(context, instance, bdms)
 
     @manager.periodic_task
diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py
index 914c45471..62c1ed9a0 100644
--- a/nova/compute/rpcapi.py
+++ b/nova/compute/rpcapi.py
@@ -163,6 +163,8 @@ class ComputeAPI(nova.openstack.common.rpc.proxy.RpcProxy):
         2.25 - Add attach_interface() and detach_interface()
         2.26 - Add validate_console_token to ensure the service connects to
                vnc on the correct port
+        2.27 - Adds 'reservations' to terminate_instance() and
+               soft_delete_instance()
     '''
 
     #
@@ -588,13 +590,14 @@ class ComputeAPI(nova.openstack.common.rpc.proxy.RpcProxy):
                 instance=instance_p),
                 topic=_compute_topic(self.topic, ctxt, None, instance))
 
-    def terminate_instance(self, ctxt, instance, bdms):
+    def terminate_instance(self, ctxt, instance, bdms, reservations=None):
         instance_p = jsonutils.to_primitive(instance)
         bdms_p = jsonutils.to_primitive(bdms)
         self.cast(ctxt, self.make_msg('terminate_instance',
-                instance=instance_p, bdms=bdms_p),
+                instance=instance_p, bdms=bdms_p,
+                reservations=reservations),
                 topic=_compute_topic(self.topic, ctxt, None, instance),
-                version='2.4')
+                version='2.27')
 
     def unpause_instance(self, ctxt, instance):
         instance_p = jsonutils.to_primitive(instance)
@@ -615,11 +618,12 @@ class ComputeAPI(nova.openstack.common.rpc.proxy.RpcProxy):
     def publish_service_capabilities(self, ctxt):
         self.fanout_cast(ctxt, self.make_msg('publish_service_capabilities'))
 
-    def soft_delete_instance(self, ctxt, instance):
+    def soft_delete_instance(self, ctxt, instance, reservations=None):
         instance_p = jsonutils.to_primitive(instance)
         self.cast(ctxt, self.make_msg('soft_delete_instance',
-                instance=instance_p),
-                topic=_compute_topic(self.topic, ctxt, None, instance))
+                instance=instance_p, reservations=reservations),
+                topic=_compute_topic(self.topic, ctxt, None, instance),
+                version='2.27')
 
     def restore_instance(self, ctxt, instance):
         instance_p = jsonutils.to_primitive(instance)
author	Chris Behrens <cbehrens@codestud.com>	2013-03-11 00:20:23 -0700
committer	Chris Behrens <cbehrens@codestud.com>	2013-03-11 19:26:49 -0700
commit	652a487ed9daba9ae97f7df77ae35720322d1af3 (patch)
tree	d21de2ac493af0334aa7f4942e2893e141861006 /nova/compute
parent	f543f347c84e7f5de2c584ca55363e4dee5b0a3d (diff)