From bed78bc1db64dd97af50d1f737dd2c8af63ed61d Mon Sep 17 00:00:00 2001 From: Rafi Khardalian Date: Sat, 19 Jan 2013 07:12:46 +0000 Subject: More consistent libvirt XML handling and cleanup Fixes bug 1081373 The overall goal of this effort is to make handling of libvirt XMLs more consistent. First, the on-disk XML file in the instances directory is never to be used by the libvirt driver. It will be generated via to_xml(). Second, anytime it is safe to re-define the XML using the version we programatically generate, we do so. Extended to_xml() to support a new write_to_disk argument. When true, this the resulting XML will be written to the instances directory. Renamed _get_domain_xml to _get_existing_domain_xml, as it should only be used when we need the XML for a defined domain. It will continue to fall back to using to_xml() if the defined version is not available. Hard reboots will now assume nothing about the defined state within libvirt. Rather, we generate and define the domain every time. This eliminates the possibility of the hypervisor and Nova database going out of sync (i.e. volume attached in defined XML with Nova having no knowledge). We no longer need to pass an xml argument into _hard_reboot() since it is always generated. Resume state on boot has been updated with logic similar to that of _hard_reboot. Again, after a hypervisor reboot (or detection of downed VMs on compute start), we should not assume the defined XML is valid. The state or configuration of the VM, from Nova's perspective, may have been changed while the system was down. The post_live_migration_on_destination() method used to call to_xml() without block_device_info. This would result in an on-disk XML without any volume mappings. The compute manager has been updated to pass bdi so that this XML can be generated and written correctly. The Mox stub for this function has been replaced by a stub in the fake virt driver. Change-Id: I4aa1068fa3aa54fdb7e690d46b6cf2e41bb20bc9 --- nova/compute/manager.py | 5 ++- nova/tests/compute/test_compute.py | 10 +++--- nova/tests/test_libvirt.py | 2 +- nova/virt/driver.py | 3 +- nova/virt/fake.py | 6 ++++ nova/virt/hyperv/driver.py | 2 +- nova/virt/libvirt/driver.py | 67 +++++++++++++++++++++++++------------- nova/virt/xenapi/driver.py | 3 +- 8 files changed, 65 insertions(+), 33 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 86f41cd3c..6e1248019 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2869,9 +2869,12 @@ class ComputeManager(manager.SchedulerDependentManager): self.network_api.migrate_instance_finish(context, instance, migration) network_info = self._get_instance_nw_info(context, instance) + block_device_info = self._get_instance_volume_block_device_info( + context, instance) + self.driver.post_live_migration_at_destination(context, instance, self._legacy_nw_info(network_info), - block_migration) + block_migration, block_device_info) # Restore instance state current_power_state = self._get_power_state(context, instance) instance = self._instance_update(context, instance['uuid'], diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 596668048..e1f33ca08 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -2631,8 +2631,6 @@ class ComputeTestCase(BaseTestCase): 'setup_networks_on_host') self.mox.StubOutWithMock(self.compute.network_api, 'migrate_instance_finish') - self.mox.StubOutWithMock(self.compute.driver, - 'post_live_migration_at_destination') self.mox.StubOutWithMock(self.compute, '_get_power_state') self.mox.StubOutWithMock(self.compute, '_instance_update') @@ -2650,10 +2648,12 @@ class ComputeTestCase(BaseTestCase): self.compute.network_api.migrate_instance_finish(admin_ctxt, instance, migration) fake_net_info = [] + fake_block_dev_info = {'foo': 'bar'} self.compute.driver.post_live_migration_at_destination(admin_ctxt, - instance, - fake_net_info, - False) + instance, + fake_net_info, + False, + fake_block_dev_info) self.compute._get_power_state(admin_ctxt, instance).AndReturn( 'fake_power_state') diff --git a/nova/tests/test_libvirt.py b/nova/tests/test_libvirt.py index c4816d202..f895fde24 100644 --- a/nova/tests/test_libvirt.py +++ b/nova/tests/test_libvirt.py @@ -4581,7 +4581,7 @@ class LibvirtDriverTestCase(test.TestCase): pass def fake_to_xml(instance, network_info, image_meta=None, rescue=None, - block_device_info=None): + block_device_info=None, write_to_disk=False): return "" def fake_plug_vifs(instance, network_info): diff --git a/nova/virt/driver.py b/nova/virt/driver.py index aa0439e74..566e5230d 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -447,7 +447,8 @@ class ComputeDriver(object): def post_live_migration_at_destination(self, ctxt, instance_ref, network_info, - block_migration=False): + block_migration=False, + block_device_info=None): """Post operation of live migration at destination host. :param ctxt: security context diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 338d1dec1..04eeded72 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -166,6 +166,12 @@ class FakeDriver(driver.ComputeDriver): block_device_info=None): pass + def post_live_migration_at_destination(self, context, instance, + network_info, + block_migration=False, + block_device_info=None): + pass + def power_off(self, instance): pass diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index 799ef7172..9316b2598 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -164,7 +164,7 @@ class HyperVDriver(driver.ComputeDriver): block_device_info, network_info) def post_live_migration_at_destination(self, ctxt, instance_ref, - network_info, block_migration): + network_info, block_migration, block_device_info=None): self._livemigrationops.post_live_migration_at_destination(ctxt, instance_ref, network_info, block_migration) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 597aa39a0..a536097e3 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -427,10 +427,10 @@ class LibvirtDriver(driver.ComputeDriver): """Efficient override of base instance_exists method.""" return self._conn.numOfDomains() - def instance_exists(self, instance_id): + def instance_exists(self, instance_name): """Efficient override of base instance_exists method.""" try: - self._lookup_by_name(instance_id) + self._lookup_by_name(instance_name) return True except exception.NovaException: return False @@ -707,7 +707,8 @@ class LibvirtDriver(driver.ComputeDriver): if child.get('dev') == device: return etree.tostring(node) - def _get_domain_xml(self, instance, network_info, block_device_info=None): + def _get_existing_domain_xml(self, instance, network_info, + block_device_info=None): try: virt_dom = self._lookup_by_name(instance['name']) xml = virt_dom.XMLDesc(0) @@ -855,8 +856,7 @@ class LibvirtDriver(driver.ComputeDriver): else: LOG.warn(_("Failed to soft reboot instance."), instance=instance) - return self._hard_reboot(instance, network_info, - block_device_info=block_device_info) + return self._hard_reboot(instance, network_info, block_device_info) def _soft_reboot(self, instance): """Attempt to shutdown and restart the instance gracefully. @@ -895,8 +895,7 @@ class LibvirtDriver(driver.ComputeDriver): greenthread.sleep(1) return False - def _hard_reboot(self, instance, network_info, xml=None, - block_device_info=None): + def _hard_reboot(self, instance, network_info, block_device_info=None): """Reboot a virtual machine, given an instance reference. Performs a Libvirt reset (if supported) on the domain. @@ -909,11 +908,10 @@ class LibvirtDriver(driver.ComputeDriver): existing domain. """ - if not xml: - xml = self._get_domain_xml(instance, network_info, - block_device_info) - self._destroy(instance) + xml = self.to_xml(instance, network_info, + block_device_info=block_device_info, + write_to_disk=True) self._create_domain_and_network(xml, instance, network_info, block_device_info) @@ -958,17 +956,37 @@ class LibvirtDriver(driver.ComputeDriver): def resume(self, instance, network_info, block_device_info=None): """resume the specified instance.""" - xml = self._get_domain_xml(instance, network_info, block_device_info) + xml = self._get_existing_domain_xml(instance, network_info, + block_device_info) self._create_domain_and_network(xml, instance, network_info, block_device_info) def resume_state_on_host_boot(self, context, instance, network_info, block_device_info=None): """resume guest state when a host is booted.""" - xml = self._get_domain_xml(instance, network_info, block_device_info) + xml = self._get_existing_domain_xml(instance, network_info, + block_device_info) self._create_domain_and_network(xml, instance, network_info, block_device_info) + # Check if the instance is running already and avoid doing + # anything if it is. + if self.instance_exists(instance['name']): + domain = self._lookup_by_name(instance['name']) + state = LIBVIRT_POWER_STATE[domain.info()[0]] + + ignored_states = (power_state.RUNNING, + power_state.SUSPENDED, + power_state.PAUSED) + + if state in ignored_states: + return + + # Instance is not up and could be in an unknown state. + # Be as absolute as possible about getting it back into + # a known and running state. + self._hard_reboot(instance, network_info, block_device_info) + def rescue(self, context, instance, network_info, image_meta, rescue_password): """Loads a VM using rescue images. @@ -980,7 +998,7 @@ class LibvirtDriver(driver.ComputeDriver): """ instance_dir = libvirt_utils.get_instance_path(instance) - unrescue_xml = self._get_domain_xml(instance, network_info) + unrescue_xml = self._get_existing_domain_xml(instance, network_info) unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml') libvirt_utils.write_to_file(unrescue_xml_path, unrescue_xml) @@ -1855,11 +1873,18 @@ class LibvirtDriver(driver.ComputeDriver): return guest def to_xml(self, instance, network_info, image_meta=None, rescue=None, - block_device_info=None): + block_device_info=None, write_to_disk=False): LOG.debug(_('Starting toXML method'), instance=instance) conf = self.get_guest_config(instance, network_info, image_meta, rescue, block_device_info) xml = conf.to_xml() + + if write_to_disk: + instance_dir = os.path.join(CONF.instances_path, + instance["name"]) + xml_path = os.path.join(instance_dir, 'libvirt.xml') + libvirt_utils.write_to_file(xml_path, xml) + LOG.debug(_('Finished toXML method'), instance=instance) return xml @@ -2761,7 +2786,8 @@ class LibvirtDriver(driver.ComputeDriver): def post_live_migration_at_destination(self, ctxt, instance_ref, network_info, - block_migration): + block_migration, + block_device_info=None): """Post operation of live migration at destination host. :param ctxt: security context @@ -2774,15 +2800,10 @@ class LibvirtDriver(driver.ComputeDriver): # Define migrated instance, otherwise, suspend/destroy does not work. dom_list = self._conn.listDefinedDomains() if instance_ref["name"] not in dom_list: - instance_dir = libvirt_utils.get_instance_path(instance_ref) - xml_path = os.path.join(instance_dir, 'libvirt.xml') # In case of block migration, destination does not have # libvirt.xml - if not os.path.isfile(xml_path): - xml = self.to_xml(instance_ref, network_info=network_info) - f = open(os.path.join(instance_dir, 'libvirt.xml'), 'w+') - f.write(xml) - f.close() + self.to_xml(instance_ref, network_info, block_device_info, + write_to_disk=True) # libvirt.xml should be made by to_xml(), but libvirt # does not accept to_xml() result, since uuid is not # included in to_xml() result. diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index 0acc360e8..db6e763ae 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -499,7 +499,8 @@ class XenAPIDriver(driver.ComputeDriver): pass def post_live_migration_at_destination(self, ctxt, instance_ref, - network_info, block_migration): + network_info, block_migration, + block_device_info=None): """Post operation of live migration at destination host. :params ctxt: security context -- cgit