diff options
| -rw-r--r-- | nova/tests/test_xenapi.py | 33 | ||||
| -rw-r--r-- | nova/tests/xenapi/stubs.py | 27 | ||||
| -rw-r--r-- | nova/virt/xenapi/vm_utils.py | 146 | ||||
| -rw-r--r-- | nova/virt/xenapi/vmops.py | 124 | ||||
| -rw-r--r-- | nova/virt/xenapi_conn.py | 9 | ||||
| -rw-r--r-- | plugins/xenserver/xenapi/etc/xapi.d/plugins/glance | 5 |
6 files changed, 255 insertions, 89 deletions
diff --git a/nova/tests/test_xenapi.py b/nova/tests/test_xenapi.py index 66a973a78..4651040d2 100644 --- a/nova/tests/test_xenapi.py +++ b/nova/tests/test_xenapi.py @@ -302,6 +302,19 @@ class XenAPIVMTestCase(test.TestCase): self.assertEquals(self.vm['HVM_boot_params'], {}) self.assertEquals(self.vm['HVM_boot_policy'], '') + def _check_no_unbound_vdi(self): + url = FLAGS.xenapi_connection_url + username = FLAGS.xenapi_connection_username + password = FLAGS.xenapi_connection_password + session = xenapi_conn.XenAPISession(url, username, password) + vdi_refs = session.call_xenapi('VDI.get_all') + for vdi_ref in vdi_refs: + vdi_rec = session.call_xenapi('VDI.get_record', vdi_ref) + if 'VBDs' in vdi_rec: + self.assertEquals(vdi_rec['VBDs'], {}) + else: + self.fail('Found unexpected unbound VDI:%s' % vdi_rec['uuid']) + def _test_spawn(self, image_id, kernel_id, ramdisk_id, instance_type="m1.large", os_type="linux"): stubs.stubout_session(self.stubs, stubs.FakeSessionForVMTests) @@ -328,6 +341,26 @@ class XenAPIVMTestCase(test.TestCase): self._test_spawn, 1, 2, 3, "m1.xlarge") + def test_spawn_fail_cleanup_1(self): + """Simulates an error while downloading image + Verifies VDI create are properly cleaned up""" + FLAGS.xenapi_image_service = 'glance' + stubs.stubout_fetch_image_glance_disk(self.stubs) + self.assertRaises(xenapi_fake.Failure, + self._test_spawn, 1, 2, 3) + #ensure there is no VDI without a VBD + self._check_no_unbound_vdi() + + def test_spawn_fail_cleanup_2(self): + """Simulates an error while creating VM record. + Verifies VDI create are properly cleaned up""" + FLAGS.xenapi_image_service = 'glance' + stubs.stubout_create_vm(self.stubs) + self.assertRaises(xenapi_fake.Failure, + self._test_spawn, 1, 2, 3) + #ensure there is no VDI without a VBD + self._check_no_unbound_vdi() + def test_spawn_raw_objectstore(self): FLAGS.xenapi_image_service = 'objectstore' self._test_spawn(1, None, None) diff --git a/nova/tests/xenapi/stubs.py b/nova/tests/xenapi/stubs.py index 7f9706a3d..fb2955fa5 100644 --- a/nova/tests/xenapi/stubs.py +++ b/nova/tests/xenapi/stubs.py @@ -137,6 +137,33 @@ def stubout_is_vdi_pv(stubs): stubs.Set(vm_utils, '_is_vdi_pv', f) +def stubout_lookup_image(stubs): + """Simulates a failure in lookup image""" + def f(_1, _2, _3, _4): + raise Exception("Test Exception raised by fake lookup_image") + stubs.Set(vm_utils, 'lookup_image', f) + + +def stubout_fetch_image_glance_disk(stubs): + """Simulates a failure in fetch image_glance_disk""" + + @classmethod + def f(cls, *args): + raise fake.Failure("Test Exception raised by " + + "fake fetch_image_glance_disk") + stubs.Set(vm_utils.VMHelper, '_fetch_image_glance_disk', f) + + +def stubout_create_vm(stubs): + """Simulates a failure in create_vm""" + + @classmethod + def f(cls, *args): + raise fake.Failure("Test Exception raised by " + + "fake create_vm") + stubs.Set(vm_utils.VMHelper, 'create_vm', f) + + class FakeSessionForVMTests(fake.SessionBase): """ Stubs out a XenAPISession for VM tests """ def __init__(self, uri): diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index 28ce215d8..284997a83 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -20,6 +20,7 @@ their attributes like VDIs, VIFs, as well as their lookup functions. """ import os +import sys import pickle import re import time @@ -63,17 +64,19 @@ KERNEL_DIR = '/boot/guest' class ImageType: """ Enumeration class for distinguishing different image types - 0 - kernel/ramdisk image (goes on dom0's filesystem) - 1 - disk image (local SR, partitioned by objectstore plugin) - 2 - raw disk image (local SR, NOT partitioned by plugin) - 3 - vhd disk image (local SR, NOT inspected by XS, PV assumed for + 0 - kernel image (goes on dom0's filesystem) + 1 - ramdisk image (goes on dom0's filesystem) + 2 - disk image (local SR, partitioned by objectstore plugin) + 3 - raw disk image (local SR, NOT partitioned by plugin) + 4 - vhd disk image (local SR, NOT inspected by XS, PV assumed for linux, HVM assumed for Windows) """ - KERNEL_RAMDISK = 0 - DISK = 1 - DISK_RAW = 2 - DISK_VHD = 3 + KERNEL = 0 + RAMDISK = 1 + DISK = 2 + DISK_RAW = 3 + DISK_VHD = 4 class VMHelper(HelperBase): @@ -137,7 +140,6 @@ class VMHelper(HelperBase): 'VCPUs_max': vcpus, 'VCPUs_params': {}, 'xenstore_data': {}} - # Complete VM configuration record according to the image type # non-raw/raw with PV kernel/raw in HVM mode if use_pv_kernel: @@ -233,6 +235,15 @@ class VMHelper(HelperBase): raise StorageError(_('Unable to destroy VBD %s') % vbd_ref) @classmethod + def destroy_vdi(cls, session, vdi_ref): + try: + task = session.call_xenapi('Async.VDI.destroy', vdi_ref) + session.wait_for_task(task) + except cls.XenAPI.Failure, exc: + LOG.exception(exc) + raise StorageError(_('Unable to destroy VDI %s') % vdi_ref) + + @classmethod def create_vif(cls, session, vm_ref, network_ref, mac_address, dev, rxtx_cap=0): """Create a VIF record. Returns a Deferred that gives the new @@ -388,12 +399,12 @@ class VMHelper(HelperBase): image_type): LOG.debug(_("Asking xapi to fetch vhd image %(image)s") % locals()) - sr_ref = safe_find_sr(session) - # NOTE(sirp): The Glance plugin runs under Python 2.4 which does not - # have the `uuid` module. To work around this, we generate the uuids - # here (under Python 2.6+) and pass them as arguments + # NOTE(sirp): The Glance plugin runs under Python 2.4 + # which does not have the `uuid` module. To work around this, + # we generate the uuids here (under Python 2.6+) and + # pass them as arguments uuid_stack = [str(uuid.uuid4()) for i in xrange(2)] params = {'image_id': image, @@ -405,17 +416,26 @@ class VMHelper(HelperBase): kwargs = {'params': pickle.dumps(params)} task = session.async_call_plugin('glance', 'download_vhd', kwargs) vdi_uuid = session.wait_for_task(task, instance_id) + #from this point we have a VDI on Xen host + #if anything goes wrong, we need to remember its uuid + try: - cls.scan_sr(session, instance_id, sr_ref) + cls.scan_sr(session, instance_id, sr_ref) - # Set the name-label to ease debugging - vdi_ref = session.get_xenapi().VDI.get_by_uuid(vdi_uuid) - name_label = get_name_label_for_image(image) - session.get_xenapi().VDI.set_name_label(vdi_ref, name_label) + # Set the name-label to ease debugging + vdi_ref = session.get_xenapi().VDI.get_by_uuid(vdi_uuid) + name_label = get_name_label_for_image(image) + session.get_xenapi().VDI.set_name_label(vdi_ref, name_label) - LOG.debug(_("xapi 'download_vhd' returned VDI UUID %(vdi_uuid)s") - % locals()) - return vdi_uuid + LOG.debug(_("xapi 'download_vhd' returned VDI UUID %(vdi_uuid)s") + % locals()) + return vdi_uuid + except cls.XenAPI.Failure as e: + #Looking for XenAPI failures only + LOG.exception(_("instance %s: Failed to fetch glance image"), + instance_id, exc_info=sys.exc_info()) + e.args = e.args + ({image_type: (vdi_uuid,)},) + raise e @classmethod def _fetch_image_glance_disk(cls, session, instance_id, image, access, @@ -431,42 +451,55 @@ class VMHelper(HelperBase): # FIXME(sirp): Since the Glance plugin seems to be required for the # VHD disk, it may be worth using the plugin for both VHD and RAW and # DISK restores + LOG.debug(_("Fetching image %(image)s") % locals()) sr_ref = safe_find_sr(session) client = glance.client.Client(FLAGS.glance_host, FLAGS.glance_port) meta, image_file = client.get_image(image) virtual_size = int(meta['size']) vdi_size = virtual_size - LOG.debug(_("Size for image %(image)s:%(virtual_size)d") % locals()) + LOG.debug(_("Size for image %(image)s:" + + "%(virtual_size)d") % locals()) if image_type == ImageType.DISK: # Make room for MBR. vdi_size += MBR_SIZE_BYTES name_label = get_name_label_for_image(image) - vdi_ref = cls.create_vdi(session, sr_ref, name_label, vdi_size, False) - - with_vdi_attached_here(session, vdi_ref, False, - lambda dev: - _stream_disk(dev, image_type, - virtual_size, image_file)) - if image_type == ImageType.KERNEL_RAMDISK: - #we need to invoke a plugin for copying VDI's - #content into proper path - LOG.debug(_("Copying VDI %s to /boot/guest on dom0"), vdi_ref) - fn = "copy_kernel_vdi" - args = {} - args['vdi-ref'] = vdi_ref - #let the plugin copy the correct number of bytes - args['image-size'] = str(vdi_size) - task = session.async_call_plugin('glance', fn, args) - filename = session.wait_for_task(task, instance_id) - #remove the VDI as it is not needed anymore - session.get_xenapi().VDI.destroy(vdi_ref) - LOG.debug(_("Kernel/Ramdisk VDI %s destroyed"), vdi_ref) - return filename - else: - return session.get_xenapi().VDI.get_uuid(vdi_ref) + vdi_ref = cls.create_vdi(session, sr_ref, name_label, + vdi_size, False) + #from this point we have a VDI on Xen host + #if anything goes wrong, we need to remember its uuid + try: + filename = None + vdi_uuid = session.get_xenapi().VDI.get_uuid(vdi_ref) + with_vdi_attached_here(session, vdi_ref, False, + lambda dev: + _stream_disk(dev, image_type, + virtual_size, image_file)) + if image_type in (ImageType.KERNEL, ImageType.RAMDISK): + #we need to invoke a plugin for copying VDI's + #content into proper path + LOG.debug(_("Copying VDI %s to /boot/guest on dom0"), vdi_ref) + fn = "copy_kernel_vdi" + args = {} + args['vdi-ref'] = vdi_ref + #let the plugin copy the correct number of bytes + args['image-size'] = str(vdi_size) + task = session.async_call_plugin('glance', fn, args) + filename = session.wait_for_task(task, instance_id) + #remove the VDI as it is not needed anymore + session.get_xenapi().VDI.destroy(vdi_ref) + LOG.debug(_("Kernel/Ramdisk VDI %s destroyed"), vdi_ref) + return filename + else: + return vdi_uuid + except (cls.XenAPI.Failure, IOError, OSError) as e: + #Looking for XenAPI and OS failures + LOG.exception(_("instance %s: Failed to fetch glance image"), + instance_id, exc_info=sys.exc_info()) + e.args = e.args + ({image_type: (vdi_uuid, filename)},) + raise e @classmethod def determine_disk_image_type(cls, instance): @@ -481,7 +514,8 @@ class VMHelper(HelperBase): whether a kernel_id is specified. """ def log_disk_format(image_type): - pretty_format = {ImageType.KERNEL_RAMDISK: 'KERNEL_RAMDISK', + pretty_format = {ImageType.KERNEL: 'KERNEL', + ImageType.RAMDISK: 'RAMDISK', ImageType.DISK: 'DISK', ImageType.DISK_RAW: 'DISK_RAW', ImageType.DISK_VHD: 'DISK_VHD'} @@ -492,12 +526,12 @@ class VMHelper(HelperBase): "%(image_id)s, instance %(instance_id)s") % locals()) def determine_from_glance(): - glance_disk_format2nova_type = { - 'ami': ImageType.DISK, - 'aki': ImageType.KERNEL_RAMDISK, - 'ari': ImageType.KERNEL_RAMDISK, - 'raw': ImageType.DISK_RAW, - 'vhd': ImageType.DISK_VHD} + glance_disk_format2nova_type = {'ami': ImageType.DISK, + 'raw': ImageType.DISK_RAW, + 'vhd': ImageType.DISK_VHD, + 'aki': ImageType.KERNEL, + 'ari': ImageType.RAMDISK} + client = glance.client.Client(FLAGS.glance_host, FLAGS.glance_port) meta = client.get_image_meta(instance.image_id) disk_format = meta['disk_format'] @@ -539,7 +573,7 @@ class VMHelper(HelperBase): secret, image_type): url = images.image_url(image) LOG.debug(_("Asking xapi to fetch %(url)s as %(access)s") % locals()) - if image_type == ImageType.KERNEL_RAMDISK: + if image_type in (ImageType.KERNEL, ImageType.RAMDISK): fn = 'get_kernel' else: fn = 'get_vdi' @@ -549,7 +583,7 @@ class VMHelper(HelperBase): args['password'] = secret args['add_partition'] = 'false' args['raw'] = 'false' - if image_type != ImageType.KERNEL_RAMDISK: + if not image_type in (ImageType.KERNEL, ImageType.RAMDISK): args['add_partition'] = 'true' if image_type == ImageType.DISK_RAW: args['raw'] = 'true' @@ -991,8 +1025,8 @@ def _write_partition(virtual_size, dev): def execute(*cmd, **kwargs): return utils.execute(*cmd, **kwargs) - execute('parted', '--script', dest, 'mklabel', 'msdos') - execute('parted', '--script', dest, 'mkpart', 'primary', + execute('sudo', 'parted', '--script', dest, 'mklabel', 'msdos') + execute('sudo', 'parted', '--script', dest, 'mkpart', 'primary', '%ds' % primary_first, '%ds' % primary_last) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index af39a3def..c2c173b6f 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -27,6 +27,7 @@ import pickle import subprocess import tempfile import uuid +import sys from nova import db from nova import context @@ -108,14 +109,26 @@ class VMOps(object): user = AuthManager().get_user(instance.user_id) project = AuthManager().get_project(instance.project_id) disk_image_type = VMHelper.determine_disk_image_type(instance) + #if fetch image fails the exception will be handled in spawn vdi_uuid = VMHelper.fetch_image(self._session, instance.id, instance.image_id, user, project, disk_image_type) - return vdi_uuid + return (vdi_uuid, disk_image_type) def spawn(self, instance, network_info=None): - vdi_uuid = self._create_disk(instance) - vm_ref = self._create_vm(instance, vdi_uuid, network_info) - self._spawn(instance, vm_ref) + try: + vdi_uuid = disk_image_type = None + (vdi_uuid, disk_image_type) = self._create_disk(instance) + vm_ref = self._create_vm(instance, vdi_uuid, network_info) + self._spawn(instance, vm_ref) + except (self.XenAPI.Failure, OSError, IOError) as spawn_error: + + LOG.exception(_("instance %s: Failed to spawn"), + instance.id, exc_info=sys.exc_info()) + LOG.debug(_('Instance %s failed to spawn - performing clean-up'), + instance.id) + self._handle_spawn_error(vdi_uuid, disk_image_type, spawn_error) + #re-throw the error + raise spawn_error def _create_vm(self, instance, vdi_uuid, network_info=None): """Create VM instance""" @@ -142,20 +155,43 @@ class VMOps(object): disk_image_type = VMHelper.determine_disk_image_type(instance) - kernel = None - if instance.kernel_id: - kernel = VMHelper.fetch_image(self._session, instance.id, - instance.kernel_id, user, project, ImageType.KERNEL_RAMDISK) - - ramdisk = None - if instance.ramdisk_id: - ramdisk = VMHelper.fetch_image(self._session, instance.id, - instance.ramdisk_id, user, project, ImageType.KERNEL_RAMDISK) - - use_pv_kernel = VMHelper.determine_is_pv(self._session, instance.id, - vdi_ref, disk_image_type, instance.os_type) - vm_ref = VMHelper.create_vm(self._session, instance, kernel, ramdisk, - use_pv_kernel) + try: + kernel = None + ramdisk = None + + if instance.kernel_id: + kernel = VMHelper.fetch_image(self._session, + instance.id, instance.kernel_id, user, project, + ImageType.KERNEL) + + if instance.ramdisk_id: + ramdisk = VMHelper.fetch_image(self._session, + instance.id, instance.ramdisk_id, user, project, + ImageType.RAMDISK) + use_pv_kernel = VMHelper.determine_is_pv(self._session, + instance.id, vdi_ref, disk_image_type, instance.os_type) + vm_ref = VMHelper.create_vm(self._session, instance, + kernel, ramdisk, use_pv_kernel) + except (self.XenAPI.Failure, OSError, IOError) as vm_create_error: + #collect resources to clean up + #_handle_spawn_error will remove unused resources + LOG.exception(_("instance %s: Failed to spawn - " + + "Unable to create VM"), + instance.id, exc_info=sys.exc_info()) + last_arg = None + resources = {} + + if len(vm_create_error.args) > 0: + last_arg = vm_create_error.args[len(vm_create_error.args) - 1] + if isinstance(last_arg, dict): + resources = last_arg + else: + vm_create_error.args = vm_create_error.args + (resources,) + if ImageType.KERNEL not in resources and kernel: + resources[ImageType.KERNEL] = (None, kernel) + if ImageType.RAMDISK not in resources and ramdisk: + resources[ImageType.RAMDISK] = (None, ramdisk) + raise vm_create_error VMHelper.create_vbd(session=self._session, vm_ref=vm_ref, vdi_ref=vdi_ref, userdevice=0, bootable=True) @@ -224,6 +260,40 @@ class VMOps(object): return timer.start(interval=0.5, now=True) + def _handle_spawn_error(self, vdi_uuid, disk_image_type, spawn_error): + resources = {} + files_to_remove = {} + #extract resource dictionary from spawn error + if len(spawn_error.args) > 0: + last_arg = spawn_error.args[len(spawn_error.args) - 1] + resources = last_arg + if vdi_uuid: + resources[disk_image_type] = (vdi_uuid,) + LOG.debug(_("Resources to remove:%s"), resources) + + for vdi_type in resources: + items_to_remove = resources[vdi_type] + vdi_to_remove = items_to_remove[0] + if vdi_to_remove != None: + try: + vdi_ref = self._session.call_xenapi('VDI.get_by_uuid', + vdi_to_remove) + LOG.debug(_('Removing VDI %(vdi_ref)s' + + '(uuid:%(vdi_to_remove)s)'), locals()) + VMHelper.destroy_vdi(self._session, vdi_ref) + except self.XenAPI.Failure: + #vdi already deleted + LOG.debug(_("Skipping VDI destroy for %s"), vdi_to_remove) + if len(items_to_remove) > 1: + # there is also a file to remove + files_to_remove[vdi_type] = items_to_remove[1] + + if len(files_to_remove) > 0: + LOG.debug(_("Removing kernel/ramdisk files from dom0")) + self._destroy_kernel_ramdisk_plugin_call( + files_to_remove.get(ImageType.KERNEL, None), + files_to_remove.get(ImageType.RAMDISK, None)) + def _get_vm_opaque_ref(self, instance_or_vm): """Refactored out the common code of many methods that receive either a vm name or a vm instance, and want a vm instance in return. @@ -547,6 +617,16 @@ class VMOps(object): VMHelper.unplug_vbd(self._session, vbd_ref) VMHelper.destroy_vbd(self._session, vbd_ref) + def _destroy_kernel_ramdisk_plugin_call(self, kernel, ramdisk): + args = {} + if kernel: + args['kernel-file'] = kernel + if ramdisk: + args['ramdisk-file'] = ramdisk + task = self._session.async_call_plugin( + 'glance', 'remove_kernel_ramdisk', args) + self._session.wait_for_task(task) + def _destroy_kernel_ramdisk(self, instance, vm_ref): """ Three situations can occur: @@ -577,13 +657,7 @@ class VMOps(object): (kernel, ramdisk) = VMHelper.lookup_kernel_ramdisk(self._session, vm_ref) - LOG.debug(_("Removing kernel/ramdisk files")) - - args = {'kernel-file': kernel, 'ramdisk-file': ramdisk} - task = self._session.async_call_plugin( - 'glance', 'remove_kernel_ramdisk', args) - self._session.wait_for_task(task, instance.id) - + self._destroy_kernel_ramdisk_plugin_call(kernel, ramdisk) LOG.debug(_("kernel/ramdisk files removed")) def _destroy_vm(self, instance, vm_ref): diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py index f20fb29d8..c7e94c508 100644 --- a/nova/virt/xenapi_conn.py +++ b/nova/virt/xenapi_conn.py @@ -367,11 +367,10 @@ class XenAPISession(object): try: name = self._session.xenapi.task.get_name_label(task) status = self._session.xenapi.task.get_status(task) - if id: - action = dict( - instance_id=int(id), - action=name[0:255], # Ensure action is never > 255 - error=None) + action = dict( + action=name[0:255], # Ensure action is never > 255 + instance_id=id and int(id) or None, + error=None) if status == "pending": return elif status == "success": diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance index 0a45f3873..b12175845 100644 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance @@ -366,15 +366,14 @@ def copy_kernel_vdi(session, args): def remove_kernel_ramdisk(session, args): """Removes kernel and/or ramdisk from dom0's file system""" - kernel_file = exists(args, 'kernel-file') - ramdisk_file = exists(args, 'ramdisk-file') + kernel_file = optional(args, 'kernel-file') + ramdisk_file = optional(args, 'ramdisk-file') if kernel_file: os.remove(kernel_file) if ramdisk_file: os.remove(ramdisk_file) return "ok" - if __name__ == '__main__': XenAPIPlugin.dispatch({'upload_vhd': upload_vhd, 'download_vhd': download_vhd, |
