1 files changed, 123 insertions, 113 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index e6b811963..fd45fd3fc 100755
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -3356,9 +3356,6 @@ class ComputeManager(manager.SchedulerDependentManager):
         number of virtual machines known by the database, we proceed in a lazy
         loop, one database record at a time, checking if the hypervisor has the
         same power state as is in the database.
-
-        If the instance is not found on the hypervisor, but is in the database,
-        then a stop() API will be called on the instance.
         """
         db_instances = self.conductor_api.instance_get_all_by_host(context,
                                                                    self.host)
@@ -3383,118 +3380,131 @@ class ComputeManager(manager.SchedulerDependentManager):
                 vm_power_state = power_state.NOSTATE
             # Note(maoy): the above get_info call might take a long time,
             # for example, because of a broken libvirt driver.
-            # We re-query the DB to get the latest instance info to minimize
-            # (not eliminate) race condition.
-            u = self.conductor_api.instance_get_by_uuid(context,
-                                                        db_instance['uuid'])
-            db_power_state = u["power_state"]
-            vm_state = u['vm_state']
-            if self.host != u['host']:
-                # on the sending end of nova-compute _sync_power_state
-                # may have yielded to the greenthread performing a live
-                # migration; this in turn has changed the resident-host
-                # for the VM; However, the instance is still active, it
-                # is just in the process of migrating to another host.
-                # This implies that the compute source must relinquish
-                # control to the compute destination.
-                LOG.info(_("During the sync_power process the "
-                           "instance has moved from "
-                           "host %(src)s to host %(dst)s") %
-                           {'src': self.host,
-                            'dst': u['host']},
+            self._sync_instance_power_state(context,
+                                            db_instance,
+                                            vm_power_state)
+
+    def _sync_instance_power_state(self, context, db_instance, vm_power_state):
+        """Align instance power state between the database and hypervisor.
+
+        If the instance is not found on the hypervisor, but is in the database,
+        then a stop() API will be called on the instance."""
+
+        # We re-query the DB to get the latest instance info to minimize
+        # (not eliminate) race condition.
+        u = self.conductor_api.instance_get_by_uuid(context,
+                                                    db_instance['uuid'])
+        db_power_state = u["power_state"]
+        vm_state = u['vm_state']
+
+        if self.host != u['host']:
+            # on the sending end of nova-compute _sync_power_state
+            # may have yielded to the greenthread performing a live
+            # migration; this in turn has changed the resident-host
+            # for the VM; However, the instance is still active, it
+            # is just in the process of migrating to another host.
+            # This implies that the compute source must relinquish
+            # control to the compute destination.
+            LOG.info(_("During the sync_power process the "
+                       "instance has moved from "
+                       "host %(src)s to host %(dst)s") %
+                       {'src': self.host,
+                        'dst': u['host']},
+                     instance=db_instance)
+            return
+        elif u['task_state'] is not None:
+            # on the receiving end of nova-compute, it could happen
+            # that the DB instance already report the new resident
+            # but the actual VM has not showed up on the hypervisor
+            # yet. In this case, let's allow the loop to continue
+            # and run the state sync in a later round
+            LOG.info(_("During sync_power_state the instance has a "
+                       "pending task. Skip."), instance=db_instance)
+            return
+
+        if vm_power_state != db_power_state:
+            # power_state is always updated from hypervisor to db
+            self._instance_update(context,
+                                  db_instance['uuid'],
+                                  power_state=vm_power_state)
+            db_power_state = vm_power_state
+
+        # Note(maoy): Now resolve the discrepancy between vm_state and
+        # vm_power_state. We go through all possible vm_states.
+        if vm_state in (vm_states.BUILDING,
+                        vm_states.RESCUED,
+                        vm_states.RESIZED,
+                        vm_states.SUSPENDED,
+                        vm_states.PAUSED,
+                        vm_states.ERROR):
+            # TODO(maoy): we ignore these vm_state for now.
+            pass
+        elif vm_state == vm_states.ACTIVE:
+            # The only rational power state should be RUNNING
+            if vm_power_state in (power_state.SHUTDOWN,
+                                  power_state.CRASHED):
+                LOG.warn(_("Instance shutdown by itself. Calling "
+                           "the stop API."), instance=db_instance)
+                try:
+                    # Note(maoy): here we call the API instead of
+                    # brutally updating the vm_state in the database
+                    # to allow all the hooks and checks to be performed.
+                    self.conductor_api.compute_stop(context, db_instance)
+                except Exception:
+                    # Note(maoy): there is no need to propagate the error
+                    # because the same power_state will be retrieved next
+                    # time and retried.
+                    # For example, there might be another task scheduled.
+                    LOG.exception(_("error during stop() in "
+                                    "sync_power_state."),
+                                  instance=db_instance)
+            elif vm_power_state == power_state.SUSPENDED:
+                LOG.warn(_("Instance is suspended unexpectedly. Calling "
+                           "the stop API."), instance=db_instance)
+                try:
+                    self.conductor_api.compute_stop(context, db_instance)
+                except Exception:
+                    LOG.exception(_("error during stop() in "
+                                    "sync_power_state."),
+                                  instance=db_instance)
+            elif vm_power_state == power_state.PAUSED:
+                # Note(maoy): a VM may get into the paused state not only
+                # because the user request via API calls, but also
+                # due to (temporary) external instrumentations.
+                # Before the virt layer can reliably report the reason,
+                # we simply ignore the state discrepancy. In many cases,
+                # the VM state will go back to running after the external
+                # instrumentation is done. See bug 1097806 for details.
+                LOG.warn(_("Instance is paused unexpectedly. Ignore."),
                          instance=db_instance)
-                continue
-            elif u['task_state'] is not None:
-                # on the receiving end of nova-compute, it could happen
-                # that the DB instance already report the new resident
-                # but the actual VM has not showed up on the hypervisor
-                # yet. In this case, let's allow the loop to continue
-                # and run the state sync in a later round
-                LOG.info(_("During sync_power_state the instance has a "
-                           "pending task. Skip."), instance=db_instance)
-                continue
-            if vm_power_state != db_power_state:
-                # power_state is always updated from hypervisor to db
-                self._instance_update(context,
-                                      db_instance['uuid'],
-                                      power_state=vm_power_state)
-                db_power_state = vm_power_state
-            # Note(maoy): Now resolve the discrepancy between vm_state and
-            # vm_power_state. We go through all possible vm_states.
-            if vm_state in (vm_states.BUILDING,
-                            vm_states.RESCUED,
-                            vm_states.RESIZED,
-                            vm_states.SUSPENDED,
-                            vm_states.PAUSED,
-                            vm_states.ERROR):
-                # TODO(maoy): we ignore these vm_state for now.
-                pass
-            elif vm_state == vm_states.ACTIVE:
-                # The only rational power state should be RUNNING
-                if vm_power_state in (power_state.SHUTDOWN,
+            elif vm_power_state == power_state.NOSTATE:
+                # Occasionally, depending on the status of the hypervisor,
+                # which could be restarting for example, an instance may
+                # not be found.  Therefore just log the condidtion.
+                LOG.warn(_("Instance is unexpectedly not found. Ignore."),
+                         instance=db_instance)
+        elif vm_state == vm_states.STOPPED:
+            if vm_power_state not in (power_state.NOSTATE,
+                                      power_state.SHUTDOWN,
                                       power_state.CRASHED):
-                    LOG.warn(_("Instance shutdown by itself. Calling "
-                               "the stop API."), instance=db_instance)
-                    try:
-                        # Note(maoy): here we call the API instead of
-                        # brutally updating the vm_state in the database
-                        # to allow all the hooks and checks to be performed.
-                        self.conductor_api.compute_stop(context, db_instance)
-                    except Exception:
-                        # Note(maoy): there is no need to propagate the error
-                        # because the same power_state will be retrieved next
-                        # time and retried.
-                        # For example, there might be another task scheduled.
-                        LOG.exception(_("error during stop() in "
-                                        "sync_power_state."),
-                                      instance=db_instance)
-                elif vm_power_state == power_state.SUSPENDED:
-                    LOG.warn(_("Instance is suspended unexpectedly. Calling "
-                               "the stop API."), instance=db_instance)
-                    try:
-                        self.conductor_api.compute_stop(context, db_instance)
-                    except Exception:
-                        LOG.exception(_("error during stop() in "
-                                        "sync_power_state."),
-                                      instance=db_instance)
-                elif vm_power_state == power_state.PAUSED:
-                    # Note(maoy): a VM may get into the paused state not only
-                    # because the user request via API calls, but also
-                    # due to (temporary) external instrumentations.
-                    # Before the virt layer can reliably report the reason,
-                    # we simply ignore the state discrepancy. In many cases,
-                    # the VM state will go back to running after the external
-                    # instrumentation is done. See bug 1097806 for details.
-                    LOG.warn(_("Instance is paused unexpectedly. Ignore."),
-                             instance=db_instance)
-                elif vm_power_state == power_state.NOSTATE:
-                    # Occasionally, depending on the status of the hypervisor,
-                    # which could be restarting for example, an instance may
-                    # not be found.  Therefore just log the condidtion.
-                    LOG.warn(_("Instance is unexpectedly not found. Ignore."),
-                             instance=db_instance)
-            elif vm_state == vm_states.STOPPED:
-                if vm_power_state not in (power_state.NOSTATE,
-                                          power_state.SHUTDOWN,
-                                          power_state.CRASHED):
-                    LOG.warn(_("Instance is not stopped. Calling "
-                               "the stop API."), instance=db_instance)
-                    try:
-                        # Note(maoy): this assumes that the stop API is
-                        # idempotent.
-                        self.conductor_api.compute_stop(context, db_instance)
-                    except Exception:
-                        LOG.exception(_("error during stop() in "
-                                        "sync_power_state."),
-                                      instance=db_instance)
-            elif vm_state in (vm_states.SOFT_DELETED,
-                              vm_states.DELETED):
-                if vm_power_state not in (power_state.NOSTATE,
-                                          power_state.SHUTDOWN):
-                    # Note(maoy): this should be taken care of periodically in
-                    # _cleanup_running_deleted_instances().
-                    LOG.warn(_("Instance is not (soft-)deleted."),
-                             instance=db_instance)
+                LOG.warn(_("Instance is not stopped. Calling "
+                           "the stop API."), instance=db_instance)
+                try:
+                    # Note(maoy): this assumes that the stop API is
+                    # idempotent.
+                    self.conductor_api.compute_stop(context, db_instance)
+                except Exception:
+                    LOG.exception(_("error during stop() in "
+                                    "sync_power_state."),
+                                  instance=db_instance)
+        elif vm_state in (vm_states.SOFT_DELETED,
+                          vm_states.DELETED):
+            if vm_power_state not in (power_state.NOSTATE,
+                                      power_state.SHUTDOWN):
+                # Note(maoy): this should be taken care of periodically in
+                # _cleanup_running_deleted_instances().
+                LOG.warn(_("Instance is not (soft-)deleted."),
+                         instance=db_instance)
 
     @manager.periodic_task
     def _reclaim_queued_deletes(self, context):