From 04ca11bd4cf43b4332c02a15edd506868031168a Mon Sep 17 00:00:00 2001
From: Naveed Massjouni <naveedm9@gmail.com>
Date: Tue, 24 Jan 2012 19:12:32 +0000
Subject: Fixing a rebuild race condition bug.

A race condition caused servers to be undeletable and stay in rebuild state.
This patch handles exceptions on compute.manager side and sets the state
of the instances appropriately.

bug: 918958
Change-Id: I7369a63174284c5b9ed257cc129f611163d5841d
---
 nova/compute/manager.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'nova')

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 6ca32b9b3..563ae6960 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -187,6 +187,12 @@ class ComputeManager(manager.SchedulerDependentManager):
         """Update an instance in the database using kwargs as value."""
         return self.db.instance_update(context, instance_id, kwargs)
 
+    def _set_instance_error_state(self, context, instance_uuid):
+        self._instance_update(context,
+                              instance_uuid,
+                              vm_state=vm_states.ERROR,
+                              task_state=None)
+
     def init_host(self):
         """Initialization for a standalone compute service."""
         self.driver.init_host(host=self.host)
@@ -704,6 +710,19 @@ class ComputeManager(manager.SchedulerDependentManager):
         :param injected_files: Files to inject
         :param new_pass: password to set on rebuilt instance
         """
+        try:
+            self._rebuild_instance(context, instance_uuid, kwargs)
+        except exception.ImageNotFound:
+            msg = _("Cannot rebuild instance [%(instance_uuid)s]"
+                    ", because the given image does not exist.")
+            LOG.error(msg % instance_uuid, context=context)
+            self._set_instance_error_state(context, instance_uuid)
+        except Exception as exc:
+            msg = _("Cannot rebuild instance [%(instance_uuid)s]: %(exc)s")
+            LOG.error(msg % locals(), context=context)
+            self._set_instance_error_state(context, instance_uuid)
+
+    def _rebuild_instance(self, context, instance_uuid, kwargs):
         context = context.elevated()
 
         LOG.audit(_("Rebuilding instance %s"), instance_uuid, context=context)
@@ -945,10 +964,7 @@ class ComputeManager(manager.SchedulerDependentManager):
                     # Catch all here because this could be anything.
                     LOG.exception(e)
                     if i == max_tries - 1:
-                        self._instance_update(context,
-                                              instance_id,
-                                              task_state=None,
-                                              vm_state=vm_states.ERROR)
+                        self._set_instance_error_state(context, instance_id)
                         # We create a new exception here so that we won't
                         # potentially reveal password information to the
                         # API caller.  The real exception is logged above
@@ -2203,10 +2219,7 @@ class ComputeManager(manager.SchedulerDependentManager):
             with utils.save_and_reraise_exception():
                 msg = _('%s. Setting instance vm_state to ERROR')
                 LOG.error(msg % error)
-                self._instance_update(context,
-                                      instance_uuid,
-                                      vm_state=vm_states.ERROR,
-                                      task_state=None)
+                self._set_instance_error_state(context, instance_uuid)
 
     def add_aggregate_host(self, context, aggregate_id, host):
         """Adds a host to a physical hypervisor pool."""
-- 
cgit