From 9fffd28cee6669089159047b2bbb5e0539ab4299 Mon Sep 17 00:00:00 2001 From: Rafi Khardalian Date: Tue, 14 Aug 2012 13:42:22 +0000 Subject: Restore libvirt block storage connections on reboot. Fixes bug 1036902. There are a number of cases where block storage connections are not properly restored, impacting libvirt in particular. The most common case is a VM which has block storage attached via iSCSI, whereby the physical system is rebooted. When the system comes back up and starts nova-compute, the iSCSI connections are NOT recreated for the instances slated to be resumed (assuming resume_guests_state_on_host_boot is set). The patch changes the compute manager to pass block_storage_info via driver.reboot() and driver.resume_state_on_host_boot(). The fix is actually only present in the libvirt driver. However, all the other drivers were updated to accept the additional, optional function arg. With the changes in place, iSCSI connections for libvirt are re-established after a hypervisor reboot with resume_guests_state_on_host_boot=True and on every hard_reboot. The latter is intended so that users have a last ditch option for recovering their VMs without administrative involvement. Change-Id: Idf5d53f21991a359bec6ce26ae9fe3bd61800ce3 --- nova/compute/manager.py | 19 +++++++++++++++---- nova/virt/driver.py | 6 ++++-- nova/virt/fake.py | 6 ++++-- nova/virt/hyperv/driver.py | 3 ++- nova/virt/libvirt/driver.py | 40 +++++++++++++++++++++++++++++++++------- nova/virt/powervm/driver.py | 3 ++- nova/virt/vmwareapi/driver.py | 3 ++- nova/virt/xenapi/driver.py | 3 ++- 8 files changed, 64 insertions(+), 19 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index cb455c1c9..39107350d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -329,10 +329,17 @@ class ComputeManager(manager.SchedulerDependentManager): LOG.info( _('Rebooting instance after nova-compute restart.'), locals(), instance=instance) + + block_device_info = \ + self._get_instance_volume_block_device_info( + context, instance['uuid']) + try: - self.driver.resume_state_on_host_boot(context, - instance, - self._legacy_nw_info(net_info)) + self.driver.resume_state_on_host_boot( + context, + instance, + self._legacy_nw_info(net_info), + block_device_info) except NotImplementedError: LOG.warning(_('Hypervisor driver does not support ' 'resume guests'), instance=instance) @@ -1118,9 +1125,13 @@ class ComputeManager(manager.SchedulerDependentManager): context=context, instance=instance) network_info = self._get_instance_nw_info(context, instance) + + block_device_info = self._get_instance_volume_block_device_info( + context, instance['uuid']) + try: self.driver.reboot(instance, self._legacy_nw_info(network_info), - reboot_type) + reboot_type, block_device_info) except Exception, exc: LOG.error(_('Cannot reboot instance: %(exc)s'), locals(), context=context, instance=instance) diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 53fc88329..dd3646bc9 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -192,7 +192,8 @@ class ComputeDriver(object): # TODO(Vek): Need to pass context in for access to auth_token raise NotImplementedError() - def reboot(self, instance, network_info, reboot_type): + def reboot(self, instance, network_info, reboot_type, + block_device_info=None): """Reboot the specified instance. :param instance: Instance object as returned by DB layer. @@ -301,7 +302,8 @@ class ComputeDriver(object): # TODO(Vek): Need to pass context in for access to auth_token raise NotImplementedError() - def resume_state_on_host_boot(self, context, instance, network_info): + def resume_state_on_host_boot(self, context, instance, network_info, + block_device_info=None): """resume guest state when a host is booted""" raise NotImplementedError() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index ebc81d13f..dd20b0b15 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -92,7 +92,8 @@ class FakeDriver(driver.ComputeDriver): if not instance['name'] in self.instances: raise exception.InstanceNotRunning() - def reboot(self, instance, network_info, reboot_type): + def reboot(self, instance, network_info, reboot_type, + block_device_info=None): pass @staticmethod @@ -105,7 +106,8 @@ class FakeDriver(driver.ComputeDriver): def inject_file(self, instance, b64_path, b64_contents): pass - def resume_state_on_host_boot(self, context, instance, network_info): + def resume_state_on_host_boot(self, context, instance, network_info, + block_device_info=None): pass def rescue(self, context, instance, network_info, image_meta, diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index 0a29c9426..85ff50e95 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -95,7 +95,8 @@ class HyperVDriver(driver.ComputeDriver): self._vmops.spawn(context, instance, image_meta, network_info, block_device_info) - def reboot(self, instance, network_info, reboot_type): + def reboot(self, instance, network_info, reboot_type, + block_device_info=None): self._vmops.reboot(instance, network_info, reboot_type) def destroy(self, instance, network_info=None, cleanup=True): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 8d3a36405..1e2706841 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -810,7 +810,8 @@ class LibvirtDriver(driver.ComputeDriver): image_file) @exception.wrap_exception() - def reboot(self, instance, network_info, reboot_type='SOFT'): + def reboot(self, instance, network_info, reboot_type='SOFT', + block_device_info=None): """Reboot a virtual machine, given an instance reference.""" if reboot_type == 'SOFT': # NOTE(vish): This will attempt to do a graceful shutdown/restart. @@ -821,7 +822,7 @@ class LibvirtDriver(driver.ComputeDriver): else: LOG.warn(_("Failed to soft reboot instance."), instance=instance) - return self._hard_reboot(instance) + return self._hard_reboot(instance, block_device_info=block_device_info) def _soft_reboot(self, instance): """Attempt to shutdown and restart the instance gracefully. @@ -858,7 +859,7 @@ class LibvirtDriver(driver.ComputeDriver): greenthread.sleep(1) return False - def _hard_reboot(self, instance, xml=None): + def _hard_reboot(self, instance, xml=None, block_device_info=None): """Reboot a virtual machine, given an instance reference. Performs a Libvirt reset (if supported) on the domain. @@ -871,6 +872,16 @@ class LibvirtDriver(driver.ComputeDriver): existing domain. """ + block_device_mapping = driver.block_device_info_get_mapping( + block_device_info) + + for vol in block_device_mapping: + connection_info = vol['connection_info'] + mount_device = vol['mount_device'].rpartition("/")[2] + self.volume_driver_method('connect_volume', + connection_info, + mount_device) + virt_dom = self._lookup_by_name(instance['name']) # NOTE(itoumsn): Use XML delived from the running instance. if not xml: @@ -934,11 +945,13 @@ class LibvirtDriver(driver.ComputeDriver): self._create_domain(domain=dom) @exception.wrap_exception() - def resume_state_on_host_boot(self, context, instance, network_info): + def resume_state_on_host_boot(self, context, instance, network_info, + block_device_info=None): """resume guest state when a host is booted""" virt_dom = self._lookup_by_name(instance['name']) xml = virt_dom.XMLDesc(0) - self._create_domain_and_network(xml, instance, network_info) + self._create_domain_and_network(xml, instance, network_info, + block_device_info) @exception.wrap_exception() def rescue(self, context, instance, network_info, image_meta, @@ -1017,7 +1030,8 @@ class LibvirtDriver(driver.ComputeDriver): block_device_info=block_device_info, files=injected_files, admin_pass=admin_password) - self._create_domain_and_network(xml, instance, network_info) + self._create_domain_and_network(xml, instance, network_info, + block_device_info) LOG.debug(_("Instance is running"), instance=instance) def _wait_for_boot(): @@ -1830,8 +1844,20 @@ class LibvirtDriver(driver.ComputeDriver): domain.createWithFlags(launch_flags) return domain - def _create_domain_and_network(self, xml, instance, network_info): + def _create_domain_and_network(self, xml, instance, network_info, + block_device_info=None): + """Do required network setup and create domain.""" + block_device_mapping = driver.block_device_info_get_mapping( + block_device_info) + + for vol in block_device_mapping: + connection_info = vol['connection_info'] + mount_device = vol['mount_device'].rpartition("/")[2] + self.volume_driver_method('connect_volume', + connection_info, + mount_device) + self.plug_vifs(instance, network_info) self.firewall_driver.setup_basic_filtering(instance, network_info) self.firewall_driver.prepare_instance_filter(instance, network_info) diff --git a/nova/virt/powervm/driver.py b/nova/virt/powervm/driver.py index 66fd8929e..1b3eba415 100644 --- a/nova/virt/powervm/driver.py +++ b/nova/virt/powervm/driver.py @@ -138,7 +138,8 @@ class PowerVMDriver(driver.ComputeDriver): """Destroy (shutdown and delete) the specified instance.""" self._powervm.destroy(instance['name']) - def reboot(self, instance, network_info, reboot_type): + def reboot(self, instance, network_info, reboot_type, + block_device_info=None): """Reboot the specified instance. :param instance: Instance object as returned by DB layer. diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py index 112de9a93..947bd3422 100644 --- a/nova/virt/vmwareapi/driver.py +++ b/nova/virt/vmwareapi/driver.py @@ -135,7 +135,8 @@ class VMWareESXDriver(driver.ComputeDriver): """Create snapshot from a running VM instance.""" self._vmops.snapshot(context, instance, name) - def reboot(self, instance, network_info, reboot_type): + def reboot(self, instance, network_info, reboot_type, + block_device_info=None): """Reboot VM instance.""" self._vmops.reboot(instance, network_info) diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index 2f472fc7b..3709c13af 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -197,7 +197,8 @@ class XenAPIDriver(driver.ComputeDriver): """ Create snapshot from a running VM instance """ self._vmops.snapshot(context, instance, image_id) - def reboot(self, instance, network_info, reboot_type): + def reboot(self, instance, network_info, reboot_type, + block_device_info=None): """Reboot VM instance""" self._vmops.reboot(instance, reboot_type) -- cgit