From f5583a7840947c9eb5b0fb766daecc4e99dd95fe Mon Sep 17 00:00:00 2001 From: Rick Harris Date: Mon, 9 Jul 2012 21:49:03 +0000 Subject: Remove VDI chain limit for migrations. The strategy for removing the limit is to refactor migration so that they work nearly identically to snapshots, meaning sequence-numbered VHDs are rsynced over into a staging-area and then imported into the SR using the `import_vhds` function. Change-Id: Ibf5c82c52ae7d505ea9e54d64fcc8b8fdce4d05d --- nova/tests/xenapi/stubs.py | 2 +- nova/virt/xenapi/vm_utils.py | 38 +++- nova/virt/xenapi/vmops.py | 219 +++++++++------------ plugins/xenserver/xenapi/etc/xapi.d/plugins/glance | 2 +- .../xenserver/xenapi/etc/xapi.d/plugins/migration | 101 +++------- .../xenserver/xenapi/etc/xapi.d/plugins/utils.py | 6 +- 6 files changed, 162 insertions(+), 206 deletions(-) diff --git a/nova/tests/xenapi/stubs.py b/nova/tests/xenapi/stubs.py index ec944d84d..483356424 100644 --- a/nova/tests/xenapi/stubs.py +++ b/nova/tests/xenapi/stubs.py @@ -346,7 +346,7 @@ def stub_out_migration_methods(stubs): pass stubs.Set(vmops.VMOps, '_destroy', fake_destroy) - stubs.Set(vmops.VMOps, '_move_disks', fake_move_disks) + stubs.Set(vm_utils, 'move_disks', fake_move_disks) stubs.Set(vm_utils, 'scan_default_sr', fake_sr) stubs.Set(vm_utils, '_scan_sr', fake_sr) stubs.Set(vm_utils, 'snapshot_attached_here', fake_snapshot_attached_here) diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index e87dbe9b6..c17f53ee8 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -897,6 +897,14 @@ def _fetch_using_dom0_plugin_with_retry(context, session, image_id, raise exception.CouldNotFetchImage(image_id=image_id) +def _make_uuid_stack(): + # NOTE(sirp): The XenAPI plugins run under Python 2.4 + # which does not have the `uuid` module. To work around this, + # we generate the uuids here (under Python 2.6+) and + # pass them as arguments + return [str(uuid.uuid4()) for i in xrange(MAX_VDI_CHAIN_SIZE)] + + def _fetch_vhd_image(context, session, instance, image_id): """Tell glance to download an image and put the VHDs into the SR @@ -905,13 +913,8 @@ def _fetch_vhd_image(context, session, instance, image_id): LOG.debug(_("Asking xapi to fetch vhd image %(image_id)s"), locals(), instance=instance) - # NOTE(sirp): The XenAPI plugins run under Python 2.4 - # which does not have the `uuid` module. To work around this, - # we generate the uuids here (under Python 2.6+) and - # pass them as arguments - uuid_stack = [str(uuid.uuid4()) for i in xrange(MAX_VDI_CHAIN_SIZE)] params = {'image_id': image_id, - 'uuid_stack': uuid_stack, + 'uuid_stack': _make_uuid_stack(), 'sr_path': get_sr_path(session), 'auth_token': getattr(context, 'auth_token', None)} @@ -2049,3 +2052,26 @@ def ensure_correct_host(session): raise raise Exception(_('This domU must be running on the host ' 'specified by xenapi_connection_url')) + + +def move_disks(session, instance, disk_info): + """Move and possibly link VHDs via the XAPI plugin.""" + params = {'instance_uuid': instance['uuid'], + 'sr_path': get_sr_path(session), + 'uuid_stack': _make_uuid_stack()} + + result = session.call_plugin( + 'migration', 'move_vhds_into_sr', {'params': pickle.dumps(params)}) + imported_vhds = jsonutils.loads(result) + + # Now we rescan the SR so we find the VHDs + scan_default_sr(session) + + # Set name-label so we can find if we need to clean up a failed + # migration + root_uuid = imported_vhds['root']['uuid'] + set_vdi_name(session, root_uuid, instance.name, 'root') + + root_vdi_ref = session.call_xenapi('VDI.get_by_uuid', root_uuid) + + return {'uuid': root_uuid, 'ref': root_vdi_ref} diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 687ec0b65..4805ccaf7 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -21,6 +21,7 @@ Management class for VM-related functions (spawn, reboot, etc). import cPickle as pickle import functools +import itertools import os import time import uuid @@ -203,7 +204,7 @@ class VMOps(object): def finish_migration(self, context, migration, instance, disk_info, network_info, image_meta, resize_instance): - root_vdi = self._move_disks(instance, disk_info) + root_vdi = vm_utils.move_disks(self._session, instance, disk_info) if resize_instance: self._resize_instance(instance, root_vdi) @@ -613,12 +614,15 @@ class VMOps(object): LOG.debug(_("Finished snapshot and upload for VM"), instance=instance) - def _migrate_vhd(self, instance, vdi_uuid, dest, sr_path): + def _migrate_vhd(self, instance, vdi_uuid, dest, sr_path, seq_num): + LOG.debug(_("Migrating VHD '%(vdi_uuid)s' with seq_num %(seq_num)d"), + locals(), instance=instance) instance_uuid = instance['uuid'] params = {'host': dest, 'vdi_uuid': vdi_uuid, 'instance_uuid': instance_uuid, - 'sr_path': sr_path} + 'sr_path': sr_path, + 'seq_num': seq_num} try: _params = {'params': pickle.dumps(params)} @@ -648,29 +652,49 @@ class VMOps(object): instance=instance) db.instance_update(context, instance['uuid'], {'progress': progress}) - def migrate_disk_and_power_off(self, context, instance, dest, - instance_type): - """Copies a VHD from one host machine to another, possibly - resizing filesystem before hand. + def _migrate_disk_resizing_down(self, context, instance, dest, + instance_type, vm_ref, sr_path): + # 1. NOOP since we're not transmitting the base-copy separately + self._update_instance_progress(context, instance, + step=1, + total_steps=RESIZE_TOTAL_STEPS) - :param instance: the instance that owns the VHD in question. - :param dest: the destination host machine. - :param disk_type: values are 'primary' or 'cow'. + old_gb = instance['root_gb'] + new_gb = instance_type['root_gb'] + LOG.debug(_("Resizing down VDI %(cow_uuid)s from " + "%(old_gb)dGB to %(new_gb)dGB"), locals(), + instance=instance) - """ - # 0. Zero out the progress to begin + # 2. Power down the instance before resizing + vm_utils.shutdown_vm( + self._session, instance, vm_ref, hard=False) self._update_instance_progress(context, instance, - step=0, + step=2, total_steps=RESIZE_TOTAL_STEPS) - vm_ref = self._get_vm_opaque_ref(instance) + # 3. Copy VDI, resize partition and filesystem, forget VDI, + # truncate VHD + vdi_ref, vm_vdi_rec = vm_utils.get_vdi_for_vm_safely( + self._session, vm_ref) + new_ref, new_uuid = vm_utils.resize_disk(self._session, + instance, + vdi_ref, + instance_type) + self._update_instance_progress(context, instance, + step=3, + total_steps=RESIZE_TOTAL_STEPS) - # The primary VDI becomes the COW after the snapshot, and we can - # identify it via the VBD. The base copy is the parent_uuid returned - # from the snapshot creation + # 4. Transfer the new VHD + self._migrate_vhd(instance, new_uuid, dest, sr_path, 0) + self._update_instance_progress(context, instance, + step=4, + total_steps=RESIZE_TOTAL_STEPS) - base_copy_uuid = cow_uuid = None + # Clean up VDI now that it's been copied + vm_utils.destroy_vdi(self._session, new_ref) + def _migrate_disk_resizing_up(self, context, instance, dest, vm_ref, + sr_path): # 1. Create Snapshot label = "%s-snapshot" % instance.name with vm_utils.snapshot_attached_here( @@ -679,124 +703,71 @@ class VMOps(object): step=1, total_steps=RESIZE_TOTAL_STEPS) - # FIXME(sirp): this needs to work with VDI chain of arbitrary - # length - base_copy_uuid = vdi_uuids[1] - _vdi_info = vm_utils.get_vdi_for_vm_safely(self._session, vm_ref) - vdi_ref, vm_vdi_rec = _vdi_info - cow_uuid = vm_vdi_rec['uuid'] - - sr_path = vm_utils.get_sr_path(self._session) - - if (instance['auto_disk_config'] and - instance['root_gb'] > instance_type['root_gb']): - # Resizing disk storage down - old_gb = instance['root_gb'] - new_gb = instance_type['root_gb'] - - LOG.debug(_("Resizing down VDI %(cow_uuid)s from " - "%(old_gb)dGB to %(new_gb)dGB"), locals(), - instance=instance) - - # 2. Power down the instance before resizing - vm_utils.shutdown_vm( - self._session, instance, vm_ref, hard=False) + # 2. Transfer the immutable VHDs (base-copies) + # + # The first VHD will be the leaf (aka COW) that is being used by + # the VM. For this step, we're only interested in the immutable + # VHDs which are all of the parents of the leaf VHD. + for seq_num, vdi_uuid in itertools.islice( + enumerate(vdi_uuids), 1, None): + self._migrate_vhd(instance, vdi_uuid, dest, sr_path, seq_num) self._update_instance_progress(context, instance, step=2, total_steps=RESIZE_TOTAL_STEPS) - # 3. Copy VDI, resize partition and filesystem, forget VDI, - # truncate VHD - new_ref, new_uuid = vm_utils.resize_disk(self._session, - instance, - vdi_ref, - instance_type) - self._update_instance_progress(context, instance, - step=3, - total_steps=RESIZE_TOTAL_STEPS) - - # 4. Transfer the new VHD - self._migrate_vhd(instance, new_uuid, dest, sr_path) - self._update_instance_progress(context, instance, - step=4, - total_steps=RESIZE_TOTAL_STEPS) - - # Clean up VDI now that it's been copied - vm_utils.destroy_vdi(self._session, new_ref) - - vdis = {'base_copy': new_uuid} - else: - # Resizing disk storage up, will be handled on destination - - # As an optimization, we transfer the base VDI first, - # then shut down the VM, followed by transfering the COW - # VDI. - - # 2. Transfer the base copy - self._migrate_vhd(instance, base_copy_uuid, dest, sr_path) - self._update_instance_progress(context, instance, - step=2, - total_steps=RESIZE_TOTAL_STEPS) - - # 3. Now power down the instance - vm_utils.shutdown_vm( - self._session, instance, vm_ref, hard=False) - self._update_instance_progress(context, instance, - step=3, - total_steps=RESIZE_TOTAL_STEPS) - - # 4. Transfer the COW VHD - self._migrate_vhd(instance, cow_uuid, dest, sr_path) - self._update_instance_progress(context, instance, - step=4, - total_steps=RESIZE_TOTAL_STEPS) - - # TODO(mdietz): we could also consider renaming these to - # something sensible so we don't need to blindly pass - # around dictionaries - vdis = {'base_copy': base_copy_uuid, 'cow': cow_uuid} - - # NOTE(sirp): in case we're resizing to the same host (for dev - # purposes), apply a suffix to name-label so the two VM records - # extant until a confirm_resize don't collide. - name_label = self._get_orig_vm_name_label(instance) - vm_utils.set_vm_name_label(self._session, vm_ref, name_label) + # 3. Now power down the instance + vm_utils.shutdown_vm( + self._session, instance, vm_ref, hard=False) + self._update_instance_progress(context, instance, + step=3, + total_steps=RESIZE_TOTAL_STEPS) - return vdis + # 4. Transfer the COW VHD + vdi_ref, vm_vdi_rec = vm_utils.get_vdi_for_vm_safely( + self._session, vm_ref) + cow_uuid = vm_vdi_rec['uuid'] + self._migrate_vhd(instance, cow_uuid, dest, sr_path, 0) + self._update_instance_progress(context, instance, + step=4, + total_steps=RESIZE_TOTAL_STEPS) - def _move_disks(self, instance, disk_info): - """Move and possibly link VHDs via the XAPI plugin.""" - base_copy_uuid = disk_info['base_copy'] - new_base_copy_uuid = str(uuid.uuid4()) + def migrate_disk_and_power_off(self, context, instance, dest, + instance_type): + """Copies a VHD from one host machine to another, possibly + resizing filesystem before hand. - params = {'instance_uuid': instance['uuid'], - 'sr_path': vm_utils.get_sr_path(self._session), - 'old_base_copy_uuid': base_copy_uuid, - 'new_base_copy_uuid': new_base_copy_uuid} + :param instance: the instance that owns the VHD in question. + :param dest: the destination host machine. + :param instance_type: instance_type to resize to + """ + vm_ref = self._get_vm_opaque_ref(instance) + sr_path = vm_utils.get_sr_path(self._session) + resize_down = (instance['auto_disk_config'] and + instance['root_gb'] > instance_type['root_gb']) - if 'cow' in disk_info: - cow_uuid = disk_info['cow'] - new_cow_uuid = str(uuid.uuid4()) - params['old_cow_uuid'] = cow_uuid - params['new_cow_uuid'] = new_cow_uuid + # 0. Zero out the progress to begin + self._update_instance_progress(context, instance, + step=0, + total_steps=RESIZE_TOTAL_STEPS) - new_uuid = new_cow_uuid + if resize_down: + self._migrate_disk_resizing_down( + context, instance, dest, instance_type, vm_ref, sr_path) else: - new_uuid = new_base_copy_uuid - - self._session.call_plugin('migration', 'move_vhds_into_sr', - {'params': pickle.dumps(params)}) + self._migrate_disk_resizing_up( + context, instance, dest, vm_ref, sr_path) - # Now we rescan the SR so we find the VHDs - vm_utils.scan_default_sr(self._session) - - # Set name-label so we can find if we need to clean up a failed - # migration - vm_utils.set_vdi_name(self._session, new_uuid, instance.name, 'root') - - new_ref = self._session.call_xenapi('VDI.get_by_uuid', new_uuid) + # NOTE(sirp): in case we're resizing to the same host (for dev + # purposes), apply a suffix to name-label so the two VM records + # extant until a confirm_resize don't collide. + name_label = self._get_orig_vm_name_label(instance) + vm_utils.set_vm_name_label(self._session, vm_ref, name_label) - return {'uuid': new_uuid, 'ref': new_ref} + # NOTE(sirp): disk_info isn't used by the xenapi driver, instead it + # uses a staging-area (/images/instance) and sequence-numbered + # VHDs to figure out how to reconstruct the VDI chain after syncing + disk_info = {} + return disk_info def _resize_instance(self, instance, root_vdi): """Resize an instances root disk.""" diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance index 6fbd14714..12616be40 100755 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance @@ -231,7 +231,7 @@ def upload_vhd(session, args): staging_path = utils.make_staging_area(sr_path) try: - utils.prepare_staging_area_for_upload(sr_path, staging_path, vdi_uuids) + utils.prepare_staging_area(sr_path, staging_path, vdi_uuids) _upload_tarball(staging_path, image_id, glance_host, glance_port, auth_token, properties) finally: diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/migration b/plugins/xenserver/xenapi/etc/xapi.d/plugins/migration index 42f8eaef8..85ad0bdbc 100755 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/migration +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/migration @@ -20,6 +20,10 @@ XenAPI Plugin for transfering data between host nodes """ import cPickle as pickle +try: + import json +except ImportError: + import simplejson as json import os import os.path import shlex @@ -28,71 +32,37 @@ import subprocess import XenAPIPlugin +import utils + from pluginlib_nova import * configure_logging('migration') -def move_file(item, src, dst): - """Move file with logging.""" - #NOTE(markwash): shutil.move can be less efficient than it should be if - # dst is a directory. See http://bugs.python.org/issue1577. - if os.path.isdir(dst): - dst = os.path.join(dst, os.path.basename(src)) - logging.debug('Moving %(item)s: %(src)s -> %(dst)s' % locals()) - shutil.move(src, dst) - - def move_vhds_into_sr(session, args): """Moves the VHDs from their copied location to the SR""" params = pickle.loads(exists(args, 'params')) instance_uuid = params['instance_uuid'] - sr_path = params['sr_path'] - sr_temp_path = "%s/tmp" % sr_path - temp_vhd_path = "%s/instance%s" % (sr_temp_path, instance_uuid) - - logging.debug('Creating temporary SR path %s' % temp_vhd_path) - os.makedirs(temp_vhd_path) - - # Discover the copied VHDs locally, and then set up paths to copy - # them to under the SR - source_image_path = "/images/instance%s" % instance_uuid - - old_base_copy_uuid = params['old_base_copy_uuid'] - new_base_copy_uuid = params['new_base_copy_uuid'] - source_base_copy_path = "%s/%s.vhd" % (source_image_path, - old_base_copy_uuid) - new_base_copy_path = "%s/%s.vhd" % (temp_vhd_path, new_base_copy_uuid) - - move_file('base', source_base_copy_path, new_base_copy_path) - - if 'old_cow_uuid' in params: - old_cow_uuid = params['old_cow_uuid'] - new_cow_uuid = params['new_cow_uuid'] + uuid_stack = params['uuid_stack'] - source_cow_path = "%s/%s.vhd" % (source_image_path, old_cow_uuid) - new_cow_path = "%s/%s.vhd" % (temp_vhd_path, new_cow_uuid) + staging_path = "/images/instance%s" % instance_uuid + imported_vhds = utils.import_vhds(sr_path, staging_path, uuid_stack) + utils.cleanup_staging_area(staging_path) + return json.dumps(imported_vhds) - move_file('COW', source_cow_path, new_cow_path) - # Link the COW to the base copy - logging.debug('Attaching COW to the base %s -> %s' % - (new_cow_path, new_base_copy_path)) - subprocess.call(['/usr/sbin/vhd-util', 'modify', - '-n', new_cow_path, '-p', new_base_copy_path]) - - # NOTE(sirp): COW should be copied before base_copy to avoid - # snapwatchd GC'ing an unreferenced base copy VDI - move_file('COW', new_cow_path, sr_path) +def _rsync_vhds(instance_uuid, host, staging_path, user="root"): + ssh_cmd = '\"ssh -o StrictHostKeyChecking=no\"' - move_file('base', new_base_copy_path, sr_path) + if not staging_path.endswith('/'): + staging_path += '/' - logging.debug('Cleaning up source path %s' % source_image_path) - os.rmdir(source_image_path) + dest_path = '%s@%s:/images/instance%s/' % (user, host, instance_uuid) - logging.debug('Cleaning up temporary SR path %s' % temp_vhd_path) - os.rmdir(temp_vhd_path) - return "" + rsync_cmd = "nohup /usr/bin/rsync -av -e %(ssh_cmd)s %(staging_path)s"\ + " %(dest_path)s" % locals() + rsync_proc = utils.make_subprocess(rsync_cmd, stdout=True, stderr=True) + utils.finish_subprocess(rsync_proc, rsync_cmd) def transfer_vhd(session, args): @@ -102,32 +72,19 @@ def transfer_vhd(session, args): host = params['host'] vdi_uuid = params['vdi_uuid'] sr_path = params['sr_path'] - vhd_path = "%s.vhd" % vdi_uuid - - source_path = "%s/%s" % (sr_path, vhd_path) - dest_user = 'root' - dest_path = '%s@%s:/images/instance%s/' % (dest_user, host, instance_uuid) - - logging.debug("Preparing to transmit %s to %s" % (source_path, - dest_path)) - - ssh_cmd = '\"ssh -o StrictHostKeyChecking=no\"' - - # NOTE(dprince): shlex python 2.4 doesn't like unicode so we - # explicitly convert to ascii - rsync_args = shlex.split(('nohup /usr/bin/rsync -av -e %s %s %s' - % (ssh_cmd, source_path, dest_path)).encode('ascii')) + seq_num = params['seq_num'] - logging.debug('rsync %s' % (' '.join(rsync_args, ))) + staging_path = utils.make_staging_area(sr_path) + try: + utils.prepare_staging_area( + sr_path, staging_path, [vdi_uuid], seq_num=seq_num) + _rsync_vhds(instance_uuid, host, staging_path) + finally: + utils.cleanup_staging_area(staging_path) - rsync_proc = subprocess.Popen(rsync_args, stdout=subprocess.PIPE) - logging.debug('Rsync output: \n %s' % rsync_proc.communicate()[0]) - logging.debug('Rsync return: %d' % rsync_proc.returncode) - if rsync_proc.returncode != 0: - raise Exception("Unexpected VHD transfer failure") return "" if __name__ == '__main__': XenAPIPlugin.dispatch({'transfer_vhd': transfer_vhd, - 'move_vhds_into_sr': move_vhds_into_sr, }) + 'move_vhds_into_sr': move_vhds_into_sr}) diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/utils.py b/plugins/xenserver/xenapi/etc/xapi.d/plugins/utils.py index 874d66fb5..20401b968 100644 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/utils.py +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/utils.py @@ -28,6 +28,9 @@ CHUNK_SIZE = 8192 def make_subprocess(cmdline, stdout=False, stderr=False, stdin=False): """Make a subprocess according to the given command-line string """ + # NOTE(dprince): shlex python 2.4 doesn't like unicode so we + # explicitly convert to ascii + cmdline = cmdline.encode('ascii') logging.info("Running cmd '%s'" % cmdline) kwargs = {} kwargs['stdout'] = stdout and subprocess.PIPE or None @@ -248,9 +251,8 @@ def import_vhds(sr_path, staging_path, uuid_stack): return imported_vhds -def prepare_staging_area_for_upload(sr_path, staging_path, vdi_uuids): +def prepare_staging_area(sr_path, staging_path, vdi_uuids, seq_num=0): """Hard-link VHDs into staging area.""" - seq_num = 0 for vdi_uuid in vdi_uuids: source = os.path.join(sr_path, "%s.vhd" % vdi_uuid) link_name = os.path.join(staging_path, "%d.vhd" % seq_num) -- cgit