From cac332c39645286a11c009094a86f62d02752183 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Tue, 31 Jan 2012 14:49:04 +0000 Subject: Refactoring required for blueprint xenapi-live-migration This refactoring of the libvirt live migration code is required to enable live migration in the xenapi driver. This change ensures libvirt specific checks are performed only when the libvirt driver is enabled. The complication is that some of these checks require information to be passed between the source and destination hosts. For example, when comparing CPU flags. Change-Id: I7389f0b7f03313d7f04b907f481787dadf0716fd --- nova/virt/baremetal/driver.py | 3 - nova/virt/driver.py | 68 +++++++++++------ nova/virt/fake.py | 7 +- nova/virt/libvirt/driver.py | 169 +++++++++++++++++++++++++++++++++++++++--- nova/virt/xenapi/driver.py | 4 - 5 files changed, 205 insertions(+), 46 deletions(-) (limited to 'nova/virt') diff --git a/nova/virt/baremetal/driver.py b/nova/virt/baremetal/driver.py index 50e1529c1..5ab0fe7d1 100644 --- a/nova/virt/baremetal/driver.py +++ b/nova/virt/baremetal/driver.py @@ -716,9 +716,6 @@ class BareMetalDriver(driver.ComputeDriver): LOG.info(_('Compute_service record updated for %s ') % host) db.compute_node_update(ctxt, compute_node_ref[0]['id'], dic) - def compare_cpu(self, cpu_info): - raise NotImplementedError() - def ensure_filtering_rules_for_instance(self, instance_ref, time=None): raise NotImplementedError() diff --git a/nova/virt/driver.py b/nova/virt/driver.py index ad73b1896..d4fec38d6 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -251,20 +251,6 @@ class ComputeDriver(object): """Detach the disk attached to the instance""" raise NotImplementedError() - def compare_cpu(self, cpu_info): - """Compares given cpu info against host - - Before attempting to migrate a VM to this host, - compare_cpu is called to ensure that the VM will - actually run here. - - :param cpu_info: (str) JSON structure describing the source CPU. - :returns: None if migration is acceptable - :raises: :py:class:`~nova.exception.InvalidCPUInfo` if migration - is not acceptable. - """ - raise NotImplementedError() - def migrate_disk_and_power_off(self, context, instance, dest, instance_type, network_info): """ @@ -357,27 +343,63 @@ class ComputeDriver(object): :param host: hostname that compute manager is currently running """ - # TODO(Vek): Need to pass context in for access to auth_token raise NotImplementedError() def live_migration(self, ctxt, instance_ref, dest, - post_method, recover_method): - """Spawning live_migration operation for distributing high-load. + post_method, recover_method, block_migration=False): + """Live migration of an instance to another host. - :param ctxt: security context - :param instance_ref: + :params ctxt: security context + :params instance_ref: nova.db.sqlalchemy.models.Instance object instance object that is migrated. - :param dest: destination host - :param post_method: + :params dest: destination host + :params post_method: post operation method. expected nova.compute.manager.post_live_migration. - :param recover_method: + :params recover_method: recovery method when any exception occurs. expected nova.compute.manager.recover_live_migration. + :params block_migration: if true, migrate VM disk. + """ + raise NotImplementedError() + + def check_can_live_migrate_destination(self, ctxt, instance_ref, + block_migration=False, + disk_over_commit=False): + """Check if it is possible to execute live migration. + + This runs checks on the destination host, and then calls + back to the source host to check the results. + + :param ctxt: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance + :param dest: destination host + :param block_migration: if true, prepare for block migration + :param disk_over_commit: if true, allow disk over commit + """ + raise NotImplementedError() + def check_can_live_migrate_destination_cleanup(self, ctxt, + dest_check_data): + """Do required cleanup on dest host after check_can_live_migrate calls + + :param ctxt: security context + :param dest_check_data: result of check_can_live_migrate_destination + """ + raise NotImplementedError() + + def check_can_live_migrate_source(self, ctxt, instance_ref, + dest_check_data): + """Check if it is possible to execute live migration. + + This checks if the live migration can succeed, based on the + results from check_can_live_migrate_destination. + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance + :param dest_check_data: result of check_can_live_migrate_destination """ - # TODO(Vek): Need to pass context in for access to auth_token raise NotImplementedError() def refresh_security_group_rules(self, security_group_id): diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 85253c0fa..cf143480b 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -259,10 +259,6 @@ class FakeDriver(driver.ComputeDriver): LOG.info(_('Compute_service record updated for %s ') % host) db.compute_node_update(ctxt, compute_node_ref[0]['id'], dic) - def compare_cpu(self, xml): - """This method is supported only by libvirt.""" - raise NotImplementedError('This method is supported only by libvirt.') - def ensure_filtering_rules_for_instance(self, instance_ref, network_info): """This method is supported only by libvirt.""" raise NotImplementedError('This method is supported only by libvirt.') @@ -283,7 +279,8 @@ class FakeDriver(driver.ComputeDriver): def confirm_migration(self, migration, instance, network_info): return - def pre_live_migration(self, block_device_info): + def pre_live_migration(self, context, instance_ref, block_device_info, + network_info): """This method is supported only by libvirt.""" return diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 39ed81b15..e01f07127 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -2202,7 +2202,118 @@ class LibvirtDriver(driver.ComputeDriver): LOG.info(_('Compute_service record updated for %s ') % host) db.compute_node_update(ctxt, compute_node_ref[0]['id'], dic) - def compare_cpu(self, cpu_info): + def check_can_live_migrate_destination(self, ctxt, instance_ref, + block_migration=False, + disk_over_commit=False): + """Check if it is possible to execute live migration. + + This runs checks on the destination host, and then calls + back to the source host to check the results. + + :param ctxt: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance + :param dest: destination host + :param block_migration: if true, prepare for block migration + :param disk_over_commit: if true, allow disk over commit + """ + if block_migration: + self._assert_compute_node_has_enough_disk(ctxt, + instance_ref, + disk_over_commit) + # Compare CPU + src = instance_ref['host'] + source_cpu_info = self._get_compute_info(ctxt, src)['cpu_info'] + self._compare_cpu(source_cpu_info) + + # Create file on storage, to be checked on source host + filename = self._create_shared_storage_test_file() + + return {"filename": filename, "block_migration": block_migration} + + def check_can_live_migrate_destination_cleanup(self, ctxt, + dest_check_data): + """Do required cleanup on dest host after check_can_live_migrate calls + + :param ctxt: security context + :param disk_over_commit: if true, allow disk over commit + """ + filename = dest_check_data["filename"] + self._cleanup_shared_storage_test_file(filename) + + def check_can_live_migrate_source(self, ctxt, instance_ref, + dest_check_data): + """Check if it is possible to execute live migration. + + This checks if the live migration can succeed, based on the + results from check_can_live_migrate_destination. + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance + :param dest_check_data: result of check_can_live_migrate_destination + """ + # Checking shared storage connectivity + # if block migration, instances_paths should not be on shared storage. + dest = FLAGS.host + filename = dest_check_data["filename"] + block_migration = dest_check_data["block_migration"] + + shared = self._check_shared_storage_test_file(filename) + + if block_migration: + if shared: + reason = _("Block migration can not be used " + "with shared storage.") + raise exception.InvalidSharedStorage(reason=reason, path=dest) + + elif not shared: + reason = _("Live migration can not be used " + "without shared storage.") + raise exception.InvalidSharedStorage(reason=reason, path=dest) + + def _get_compute_info(self, context, host): + """Get compute host's information specified by key""" + compute_node_ref = db.service_get_all_compute_by_host(context, host) + return compute_node_ref[0]['compute_node'][0] + + def _assert_compute_node_has_enough_disk(self, context, instance_ref, + disk_over_commit): + """Checks if host has enough disk for block migration.""" + # Libvirt supports qcow2 disk format,which is usually compressed + # on compute nodes. + # Real disk image (compressed) may enlarged to "virtual disk size", + # that is specified as the maximum disk size. + # (See qemu-img -f path-to-disk) + # Scheduler recognizes destination host still has enough disk space + # if real disk size < available disk size + # if disk_over_commit is True, + # otherwise virtual disk size < available disk size. + + # Getting total available disk of host + dest = FLAGS.host + available_gb = self._get_compute_info(context, + dest)['disk_available_least'] + available = available_gb * (1024 ** 3) + + ret = self.get_instance_disk_info(instance_ref['name']) + disk_infos = jsonutils.loads(ret) + + necessary = 0 + if disk_over_commit: + for info in disk_infos: + necessary += int(info['disk_size']) + else: + for info in disk_infos: + necessary += int(info['virt_disk_size']) + + # Check that available disk > necessary disk + if (available - necessary) < 0: + instance_uuid = instance_ref['uuid'] + reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: " + "Lack of disk(host:%(available)s " + "<= instance:%(necessary)s)") + raise exception.MigrationError(reason=reason % locals()) + + def _compare_cpu(self, cpu_info): """Checks the host cpu is compatible to a cpu given by xml. "xml" must be a part of libvirt.openReadonly().getCapabilities(). @@ -2214,9 +2325,7 @@ class LibvirtDriver(driver.ComputeDriver): :returns: None. if given cpu info is not compatible to this server, raise exception. - """ - info = jsonutils.loads(cpu_info) LOG.info(_('Instance launched has CPU info:\n%s') % cpu_info) cpu = config.LibvirtConfigCPU() @@ -2240,8 +2349,33 @@ class LibvirtDriver(driver.ComputeDriver): raise if ret <= 0: + LOG.error(reason=m % locals()) raise exception.InvalidCPUInfo(reason=m % locals()) + def _create_shared_storage_test_file(self): + """Makes tmpfile under FLAGS.instance_path.""" + dirpath = FLAGS.instances_path + fd, tmp_file = tempfile.mkstemp(dir=dirpath) + LOG.debug(_("Creating tmpfile %s to notify to other " + "compute nodes that they should mount " + "the same storage.") % tmp_file) + os.close(fd) + return os.path.basename(tmp_file) + + def _check_shared_storage_test_file(self, filename): + """Confirms existence of the tmpfile under FLAGS.instances_path. + Cannot confirm tmpfile return False.""" + tmp_file = os.path.join(FLAGS.instances_path, filename) + if not os.path.exists(tmp_file): + return False + else: + return True + + def _cleanup_shared_storage_test_file(self, filename): + """Removes existence of the tmpfile under FLAGS.instances_path.""" + tmp_file = os.path.join(FLAGS.instances_path, filename) + os.remove(tmp_file) + def ensure_filtering_rules_for_instance(self, instance_ref, network_info, time=None): """Setting up filtering rules and waiting for its completion. @@ -2363,14 +2497,9 @@ class LibvirtDriver(driver.ComputeDriver): timer.f = wait_for_live_migration return timer.start(interval=0.5).wait() - def pre_live_migration(self, block_device_info): - """Preparation live migration. - - :params block_device_info: - It must be the result of _get_instance_volume_bdms() - at compute manager. - """ - + def pre_live_migration(self, context, instance_ref, block_device_info, + network_info): + """Preparation live migration.""" # Establishing connection to volume server. block_device_mapping = driver.block_device_info_get_mapping( block_device_info) @@ -2381,6 +2510,24 @@ class LibvirtDriver(driver.ComputeDriver): connection_info, mount_device) + # We call plug_vifs before the compute manager calls + # ensure_filtering_rules_for_instance, to ensure bridge is set up + # Retry operation is necessary because continuously request comes, + # concorrent request occurs to iptables, then it complains. + max_retry = FLAGS.live_migration_retry_count + for cnt in range(max_retry): + try: + self.plug_vifs(instance_ref, network_info) + break + except exception.ProcessExecutionError: + if cnt == max_retry - 1: + raise + else: + LOG.warn(_("plug_vifs() failed %(cnt)d." + "Retry up to %(max_retry)d for %(hostname)s.") + % locals()) + time.sleep(1) + def pre_block_migration(self, ctxt, instance_ref, disk_info_json): """Preparation block migration. diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index fd2f82106..b1184c794 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -410,10 +410,6 @@ class XenAPIDriver(driver.ComputeDriver): LOG.info(_('Compute_service record updated for %s ') % host) db.compute_node_update(ctxt, compute_node_ref[0]['id'], dic) - def compare_cpu(self, xml): - """This method is supported only by libvirt.""" - raise NotImplementedError('This method is supported only by libvirt.') - def ensure_filtering_rules_for_instance(self, instance_ref, network_info): """This method is supported only libvirt.""" # NOTE(salvatore-orlando): it enforces security groups on -- cgit