diff options
author | Wangpan <hzwangpan@corp.netease.com> | 2013-02-17 09:57:23 +0800 |
---|---|---|
committer | Wangpan <hzwangpan@corp.netease.com> | 2013-02-17 09:59:38 +0800 |
commit | 27330ac85c4353d9124b8788c727e1ce40f55ea8 (patch) | |
tree | 017f2f9c671daefdb48946b5b4eb2b2d581ed742 | |
parent | d49d504d23ebb6ae7d12e6aba37f66e7c3839ecf (diff) | |
download | nova-27330ac85c4353d9124b8788c727e1ce40f55ea8.tar.gz nova-27330ac85c4353d9124b8788c727e1ce40f55ea8.tar.xz nova-27330ac85c4353d9124b8788c727e1ce40f55ea8.zip |
Fix instance can not be deleted after soft reboot
The reason is that:
1. soft reboot will wait for instance become to 'shutdown', and then start it
2. delete operation also wait for this, and then clean up the instance
3. if soft reboot found the instance become to 'shutdown' firstly, it will
start it immediately
4. then the delete operation will go to the _wait_for_destroy loop, and the
loop may be endless
5. when we delete the instance again, because the lock was hold by the delete
operation before, this one will wait the lock and don't implement actually.
So the domain id is checked during _wait_for_destroy loop, if it changed and
the instance is still running, we need to destroy it again.
If the domain is booted after _wait_for_destroy, it may result in
unfilter_instance failed because the nwfilter is in use, so doing the same
thing as above.
Fixes Bug #1111213
Change-Id: I98dc902dd86fa828f5821465c611953e08f9f637
-rw-r--r-- | nova/tests/test_libvirt.py | 13 | ||||
-rwxr-xr-x | nova/virt/libvirt/driver.py | 71 |
2 files changed, 66 insertions, 18 deletions
diff --git a/nova/tests/test_libvirt.py b/nova/tests/test_libvirt.py index d2cd5a757..fd90e5fa9 100644 --- a/nova/tests/test_libvirt.py +++ b/nova/tests/test_libvirt.py @@ -2755,6 +2755,7 @@ class LibvirtConnTestCase(test.TestCase): def test_destroy_undefines(self): mock = self.mox.CreateMock(libvirt.virDomain) + mock.ID() mock.destroy() mock.undefineFlags(1).AndReturn(1) @@ -2764,7 +2765,7 @@ class LibvirtConnTestCase(test.TestCase): return mock def fake_get_info(instance_name): - return {'state': power_state.SHUTDOWN} + return {'state': power_state.SHUTDOWN, 'id': -1} conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.stubs.Set(conn, '_lookup_by_name', fake_lookup_by_name) @@ -2775,6 +2776,7 @@ class LibvirtConnTestCase(test.TestCase): def test_destroy_undefines_no_undefine_flags(self): mock = self.mox.CreateMock(libvirt.virDomain) + mock.ID() mock.destroy() mock.undefineFlags(1).AndRaise(libvirt.libvirtError('Err')) mock.undefine() @@ -2785,7 +2787,7 @@ class LibvirtConnTestCase(test.TestCase): return mock def fake_get_info(instance_name): - return {'state': power_state.SHUTDOWN} + return {'state': power_state.SHUTDOWN, 'id': -1} conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.stubs.Set(conn, '_lookup_by_name', fake_lookup_by_name) @@ -2796,6 +2798,7 @@ class LibvirtConnTestCase(test.TestCase): def test_destroy_undefines_no_attribute_with_managed_save(self): mock = self.mox.CreateMock(libvirt.virDomain) + mock.ID() mock.destroy() mock.undefineFlags(1).AndRaise(AttributeError()) mock.hasManagedSaveImage(0).AndReturn(True) @@ -2808,7 +2811,7 @@ class LibvirtConnTestCase(test.TestCase): return mock def fake_get_info(instance_name): - return {'state': power_state.SHUTDOWN} + return {'state': power_state.SHUTDOWN, 'id': -1} conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.stubs.Set(conn, '_lookup_by_name', fake_lookup_by_name) @@ -2819,6 +2822,7 @@ class LibvirtConnTestCase(test.TestCase): def test_destroy_undefines_no_attribute_no_managed_save(self): mock = self.mox.CreateMock(libvirt.virDomain) + mock.ID() mock.destroy() mock.undefineFlags(1).AndRaise(AttributeError()) mock.hasManagedSaveImage(0).AndRaise(AttributeError()) @@ -2830,7 +2834,7 @@ class LibvirtConnTestCase(test.TestCase): return mock def fake_get_info(instance_name): - return {'state': power_state.SHUTDOWN} + return {'state': power_state.SHUTDOWN, 'id': -1} conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.stubs.Set(conn, '_lookup_by_name', fake_lookup_by_name) @@ -2841,6 +2845,7 @@ class LibvirtConnTestCase(test.TestCase): def test_private_destroy_not_found(self): mock = self.mox.CreateMock(libvirt.virDomain) + mock.ID() mock.destroy() self.mox.ReplayAll() diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 386fe836c..7282c9e61 100755 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -493,8 +493,10 @@ class LibvirtDriver(driver.ComputeDriver): # If the instance is already terminated, we're still happy # Otherwise, destroy it + old_domid = -1 if virt_dom is not None: try: + old_domid = virt_dom.ID() virt_dom.destroy() except libvirt.libvirtError as e: is_okay = False @@ -514,14 +516,16 @@ class LibvirtDriver(driver.ComputeDriver): locals(), instance=instance) raise - def _wait_for_destroy(): + def _wait_for_destroy(expected_domid): """Called at an interval until the VM is gone.""" # NOTE(vish): If the instance disappears during the destroy # we ignore it so the cleanup can still be # attempted because we would prefer destroy to # never fail. try: - state = self.get_info(instance)['state'] + dom_info = self.get_info(instance) + state = dom_info['state'] + new_domid = dom_info['id'] except exception.NotFound: LOG.error(_("During wait destroy, instance disappeared."), instance=instance) @@ -532,8 +536,23 @@ class LibvirtDriver(driver.ComputeDriver): instance=instance) raise utils.LoopingCallDone() - timer = utils.FixedIntervalLoopingCall(_wait_for_destroy) + # NOTE(wangpan): If the instance was booted again after destroy, + # this may be a endless loop, so check the id of + # domain here, if it changed and the instance is + # still running, we should destroy it again. + # see https://bugs.launchpad.net/nova/+bug/1111213 for more details + if new_domid != expected_domid: + LOG.info(_("Instance may be started again."), + instance=instance) + kwargs['is_running'] = True + raise utils.LoopingCallDone() + + kwargs = {'is_running': False} + timer = utils.FixedIntervalLoopingCall(_wait_for_destroy, old_domid) timer.start(interval=0.5).wait() + if kwargs['is_running']: + LOG.info(_("Going to destroy instance again."), instance=instance) + self._destroy(instance) def destroy(self, instance, network_info, block_device_info=None, destroy_disks=True): @@ -575,16 +594,39 @@ class LibvirtDriver(driver.ComputeDriver): destroy_disks): self._undefine_domain(instance) self.unplug_vifs(instance, network_info) - try: - self.firewall_driver.unfilter_instance(instance, - network_info=network_info) - except libvirt.libvirtError as e: - errcode = e.get_error_code() - LOG.error(_("Error from libvirt during unfilter. " - "Code=%(errcode)s Error=%(e)s") % - locals(), instance=instance) - reason = "Error unfiltering instance." - raise exception.InstanceTerminationFailure(reason=reason) + retry = True + while retry: + try: + self.firewall_driver.unfilter_instance(instance, + network_info=network_info) + except libvirt.libvirtError as e: + try: + state = self.get_info(instance)['state'] + except exception.NotFound: + state = power_state.SHUTDOWN + + if state != power_state.SHUTDOWN: + LOG.warn(_("Instance may be still running, destroy " + "it again."), instance=instance) + self._destroy(instance) + else: + retry = False + errcode = e.get_error_code() + LOG.error(_("Error from libvirt during unfilter. " + "Code=%(errcode)s Error=%(e)s") % + locals(), instance=instance) + reason = "Error unfiltering instance." + raise exception.InstanceTerminationFailure(reason=reason) + except Exception: + retry = False + raise + else: + retry = False + + # FIXME(wangpan): if the instance is booted again here, such as the + # the soft reboot operation boot it here, it will + # become "running deleted", should we check and destroy + # it at the end of this method? # NOTE(vish): we disconnect from volumes regardless block_device_mapping = driver.block_device_info_get_mapping( @@ -2017,7 +2059,8 @@ class LibvirtDriver(driver.ComputeDriver): 'max_mem': max_mem, 'mem': mem, 'num_cpu': num_cpu, - 'cpu_time': cpu_time} + 'cpu_time': cpu_time, + 'id': virt_dom.ID()} def _create_domain(self, xml=None, domain=None, instance=None, launch_flags=0): |