From 78ebce99803522ceb98754f2765386f39858b627 Mon Sep 17 00:00:00 2001 From: Chris Behrens Date: Tue, 30 Oct 2012 18:33:18 +0000 Subject: Clean up xenapi VM records on failed disk attaches Breaks the create_vm_step up so that we can clean up the VM record in case of disk attach failures. Fixes bug 1073219 Change-Id: I54e28e2cce6fe4f29a03566eff998479410f7555 --- nova/tests/test_xenapi.py | 42 +++++++++++-- nova/tests/xenapi/stubs.py | 11 +++- nova/virt/xenapi/fake.py | 1 + nova/virt/xenapi/vm_utils.py | 3 +- nova/virt/xenapi/vmops.py | 138 +++++++++++++++++++++++++------------------ 5 files changed, 127 insertions(+), 68 deletions(-) diff --git a/nova/tests/test_xenapi.py b/nova/tests/test_xenapi.py index 11e8844c4..224b7585f 100644 --- a/nova/tests/test_xenapi.py +++ b/nova/tests/test_xenapi.py @@ -516,6 +516,13 @@ class XenAPIVMTestCase(stubs.XenAPITestBase): session = xenapi_conn.XenAPISession(url, username, password) return session.call_xenapi('VDI.get_all') + def _list_vms(self): + url = FLAGS.xenapi_connection_url + username = FLAGS.xenapi_connection_username + password = FLAGS.xenapi_connection_password + session = xenapi_conn.XenAPISession(url, username, password) + return session.call_xenapi('VM.get_all') + def _check_vdis(self, start_list, end_list): for vdi_ref in end_list: if not vdi_ref in start_list: @@ -588,30 +595,53 @@ class XenAPIVMTestCase(stubs.XenAPITestBase): def test_spawn_fail_cleanup_1(self): """Simulates an error while downloading an image. - Verifies that VDIs created are properly cleaned up. - + Verifies that the VM and VDIs created are properly cleaned up. """ vdi_recs_start = self._list_vdis() + start_vms = self._list_vms() stubs.stubout_fetch_disk_image(self.stubs, raise_failure=True) self.assertRaises(xenapi_fake.Failure, self._test_spawn, 1, 2, 3) # No additional VDI should be found. vdi_recs_end = self._list_vdis() + end_vms = self._list_vms() self._check_vdis(vdi_recs_start, vdi_recs_end) + # No additional VMs should be found. + self.assertEqual(start_vms, end_vms) def test_spawn_fail_cleanup_2(self): """Simulates an error while creating VM record. - It verifies that VDIs created are properly cleaned up. - + Verifies that the VM and VDIs created are properly cleaned up. """ vdi_recs_start = self._list_vdis() + start_vms = self._list_vms() stubs.stubout_create_vm(self.stubs) self.assertRaises(xenapi_fake.Failure, self._test_spawn, 1, 2, 3) # No additional VDI should be found. vdi_recs_end = self._list_vdis() + end_vms = self._list_vms() + self._check_vdis(vdi_recs_start, vdi_recs_end) + # No additional VMs should be found. + self.assertEqual(start_vms, end_vms) + + def test_spawn_fail_cleanup_3(self): + """Simulates an error while attaching disks. + + Verifies that the VM and VDIs created are properly cleaned up. + """ + stubs.stubout_attach_disks(self.stubs) + vdi_recs_start = self._list_vdis() + start_vms = self._list_vms() + self.assertRaises(xenapi_fake.Failure, + self._test_spawn, 1, 2, 3) + # No additional VDI should be found. + vdi_recs_end = self._list_vdis() + end_vms = self._list_vms() self._check_vdis(vdi_recs_start, vdi_recs_end) + # No additional VMs should be found. + self.assertEqual(start_vms, end_vms) @stub_vm_utils_with_vdi_attached_here def test_spawn_raw_glance(self): @@ -1362,7 +1392,7 @@ class XenAPIAutoDiskConfigTestCase(stubs.XenAPITestBase): vdis = {'root': {'uuid': vdi_uuid, 'ref': vdi_ref}} self.conn._vmops._attach_disks(instance, vm_ref, instance['name'], - disk_image_type, vdis) + vdis, disk_image_type) self.assertEqual(marker["partition_called"], called) @@ -1448,7 +1478,7 @@ class XenAPIGenerateLocal(stubs.XenAPITestBase): self.called = False self.conn._vmops._attach_disks(instance, vm_ref, instance['name'], - disk_image_type, vdis) + vdis, disk_image_type) self.assertTrue(self.called) def test_generate_swap(self): diff --git a/nova/tests/xenapi/stubs.py b/nova/tests/xenapi/stubs.py index b3b9a8964..560e12d70 100644 --- a/nova/tests/xenapi/stubs.py +++ b/nova/tests/xenapi/stubs.py @@ -138,11 +138,18 @@ def stubout_create_vm(stubs): """Simulates a failure in create_vm.""" def f(*args): - raise fake.Failure("Test Exception raised by " + - "fake create_vm") + raise fake.Failure("Test Exception raised by fake create_vm") stubs.Set(vm_utils, 'create_vm', f) +def stubout_attach_disks(stubs): + """Simulates a failure in _attach_disks.""" + + def f(*args): + raise fake.Failure("Test Exception raised by fake _attach_disks") + stubs.Set(vmops.VMOps, '_attach_disks', f) + + def _make_fake_vdi(): sr_ref = fake.get_all('SR')[0] vdi_ref = fake.create_vdi('', sr_ref) diff --git a/nova/virt/xenapi/fake.py b/nova/virt/xenapi/fake.py index c66f7bf09..b22112f66 100644 --- a/nova/virt/xenapi/fake.py +++ b/nova/virt/xenapi/fake.py @@ -221,6 +221,7 @@ def after_VM_create(vm_ref, vm_rec): vm_rec.setdefault('memory_dynamic_max', str(8 * 1024 * 1024 * 1024)) vm_rec.setdefault('VCPUs_max', str(4)) vm_rec.setdefault('VBDs', []) + vm_rec.setdefault('resident_on', '') def create_pbd(config, host_ref, sr_ref, attached): diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index b08dd1387..a4c492029 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -862,7 +862,8 @@ def destroy_kernel_ramdisk(session, kernel, ramdisk): args['kernel-file'] = kernel if ramdisk: args['ramdisk-file'] = ramdisk - session.call_plugin('kernel', 'remove_kernel_ramdisk', args) + if args: + session.call_plugin('kernel', 'remove_kernel_ramdisk', args) def _create_cached_image(context, session, instance, name_label, diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index ac00be6d7..1f8cafa3b 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -211,9 +211,10 @@ class VMOps(object): vm_utils.ImageType.RAMDISK) ramdisk_file = vdis['ramdisk'].get('file') + disk_image_type = vm_utils.determine_disk_image_type(image_meta) vm_ref = self._create_vm(context, instance, instance['name'], {'root': root_vdi}, - network_info, image_meta, kernel_file, + disk_image_type, network_info, kernel_file, ramdisk_file) # 5. Start VM self._start(instance, vm_ref=vm_ref) @@ -229,9 +230,8 @@ class VMOps(object): self._session.get_xenapi_host(), False, False) - def _create_disks(self, context, instance, name_label, image_meta, + def _create_disks(self, context, instance, name_label, disk_image_type, block_device_info=None): - disk_image_type = vm_utils.determine_disk_image_type(image_meta) vdis = vm_utils.get_vdis_for_instance(context, self._session, instance, name_label, instance['image_ref'], @@ -257,19 +257,13 @@ class VMOps(object): step = make_step_decorator(context, instance) @step - def vanity_step(undo_mgr): - # NOTE(sirp): _create_disk will potentially take a *very* long - # time to complete since it has to fetch the image over the - # network and images can be several gigs in size. To avoid - # progress remaining at 0% for too long, which will appear to be - # an error, we insert a "vanity" step to bump the progress up one - # notch above 0. - pass + def determine_disk_image_type_step(undo_mgr): + return vm_utils.determine_disk_image_type(image_meta) @step - def create_disks_step(undo_mgr): + def create_disks_step(undo_mgr, disk_image_type): vdis = self._create_disks(context, instance, name_label, - image_meta, block_device_info) + disk_image_type, block_device_info) def undo_create_disks(): vdi_refs = [vdi['ref'] for vdi in vdis.values() @@ -307,12 +301,10 @@ class VMOps(object): return kernel_file, ramdisk_file @step - def create_vm_step(undo_mgr, vdis, kernel_file, ramdisk_file): - vm_ref = self._create_vm(context, instance, name_label, vdis, - network_info, image_meta, - kernel_file=kernel_file, - ramdisk_file=ramdisk_file, - rescue=rescue) + def create_vm_record_step(undo_mgr, vdis, disk_image_type, + kernel_file, ramdisk_file): + vm_ref = self._create_vm_record(context, instance, name_label, + vdis, disk_image_type, kernel_file, ramdisk_file) def undo_create_vm(): self._destroy(instance, vm_ref, network_info) @@ -320,6 +312,11 @@ class VMOps(object): undo_mgr.undo_with(undo_create_vm) return vm_ref + @step + def attach_disks_step(undo_mgr, vm_ref, vdis, disk_image_type): + self._attach_disks(instance, vm_ref, name_label, vdis, + disk_image_type) + if rescue: # NOTE(johannes): Attach root disk to rescue VM now, before # booting the VM, since we can't hotplug block devices @@ -332,6 +329,15 @@ class VMOps(object): vm_utils.create_vbd(self._session, vm_ref, vdi_ref, DEVICE_RESCUE, bootable=False) + @step + def setup_network_step(undo_mgr, vm_ref, vdis): + self._setup_vm_networking(instance, vm_ref, vdis, network_info, + rescue) + + @step + def inject_metadata_step(undo_mgr, vm_ref): + self.inject_instance_metadata(instance, vm_ref) + @step def prepare_security_group_filters_step(undo_mgr): try: @@ -365,13 +371,21 @@ class VMOps(object): undo_mgr = utils.UndoManager() try: + # NOTE(sirp): The create_disks() step will potentially take a + # *very* long time to complete since it has to fetch the image + # over the network and images can be several gigs in size. To + # avoid progress remaining at 0% for too long, make sure the + # first step is something that completes rather quickly. bdev_set_default_root(undo_mgr) - vanity_step(undo_mgr) + disk_image_type = determine_disk_image_type_step(undo_mgr) - vdis = create_disks_step(undo_mgr) + vdis = create_disks_step(undo_mgr, disk_image_type) kernel_file, ramdisk_file = create_kernel_ramdisk_step(undo_mgr) - - vm_ref = create_vm_step(undo_mgr, vdis, kernel_file, ramdisk_file) + vm_ref = create_vm_record_step(undo_mgr, vdis, disk_image_type, + kernel_file, ramdisk_file) + attach_disks_step(undo_mgr, vm_ref, vdis, disk_image_type) + setup_network_step(undo_mgr, vm_ref, vdis) + inject_metadata_step(undo_mgr, vm_ref) prepare_security_group_filters_step(undo_mgr) if rescue: @@ -384,10 +398,42 @@ class VMOps(object): msg = _("Failed to spawn, rolling back") undo_mgr.rollback_and_reraise(msg=msg, instance=instance) - def _create_vm(self, context, instance, name_label, vdis, network_info, - image_meta, kernel_file=None, ramdisk_file=None, - rescue=False): + def _create_vm(self, context, instance, name_label, vdis, + disk_image_type, network_info, kernel_file=None, + ramdisk_file=None, rescue=False): """Create VM instance.""" + vm_ref = self._create_vm_record(context, instance, name_label, + vdis, disk_image_type, kernel_file, ramdisk_file) + self._attach_disks(instance, vm_ref, name_label, vdis, + disk_image_type) + self._setup_vm_networking(instance, vm_ref, vdis, network_info, + rescue) + self.inject_instance_metadata(instance, vm_ref) + return vm_ref + + def _setup_vm_networking(self, instance, vm_ref, vdis, network_info, + rescue): + # Alter the image before VM start for network injection. + if FLAGS.flat_injected: + vm_utils.preconfigure_instance(self._session, instance, + vdis['root']['ref'], network_info) + + self._create_vifs(vm_ref, instance, network_info) + self.inject_network_info(instance, network_info, vm_ref) + + hostname = instance['hostname'] + if rescue: + hostname = 'RESCUE-%s' % hostname + self.inject_hostname(instance, vm_ref, hostname) + + def _create_vm_record(self, context, instance, name_label, vdis, + disk_image_type, kernel_file, ramdisk_file): + """Create the VM record in Xen, making sure that we do not create + a duplicate name-label. Also do a rough sanity check on memory + to try to short-circuit a potential failure later. (The memory + check only accounts for running VMs, so it can miss other builds + that are in progress.) + """ vm_ref = vm_utils.lookup(self._session, name_label) if vm_ref is not None: raise exception.InstanceExists(name=name_label) @@ -396,8 +442,6 @@ class VMOps(object): if not vm_utils.ensure_free_mem(self._session, instance): raise exception.InsufficientFreeMemory(uuid=instance['uuid']) - disk_image_type = vm_utils.determine_disk_image_type(image_meta) - mode = vm_mode.get_from_instance(instance) if mode == vm_mode.XEN: use_pv_kernel = True @@ -410,36 +454,14 @@ class VMOps(object): if instance['vm_mode'] != mode: # Update database with normalized (or determined) value - db.instance_update(nova_context.get_admin_context(), - instance['uuid'], {'vm_mode': mode}) + db.instance_update(context, instance['uuid'], {'vm_mode': mode}) vm_ref = vm_utils.create_vm(self._session, instance, name_label, kernel_file, ramdisk_file, use_pv_kernel) - - # Add disks to VM - self._attach_disks(instance, vm_ref, name_label, disk_image_type, - vdis) - - # Alter the image before VM start for network injection. - if FLAGS.flat_injected: - vm_utils.preconfigure_instance(self._session, instance, - vdis['root']['ref'], network_info) - - self._create_vifs(vm_ref, instance, network_info) - self.inject_network_info(instance, network_info, vm_ref) - - hostname = instance['hostname'] - if rescue: - hostname = 'RESCUE-%s' % hostname - - self.inject_hostname(instance, vm_ref, hostname) - - self.inject_instance_metadata(instance, vm_ref) - return vm_ref - def _attach_disks(self, instance, vm_ref, name_label, disk_image_type, - vdis): + def _attach_disks(self, instance, vm_ref, name_label, vdis, + disk_image_type): ctx = nova_context.get_admin_context() instance_type = db.instance_type_get(ctx, instance['instance_type_id']) @@ -938,9 +960,7 @@ class VMOps(object): def _destroy_vdis(self, instance, vm_ref, block_device_info=None): """Destroys all VDIs associated with a VM.""" - instance_uuid = instance['uuid'] - LOG.debug(_("Destroying VDIs for Instance %(instance_uuid)s") - % locals()) + LOG.debug(_("Destroying VDIs"), instance=instance) vdi_refs = vm_utils.lookup_vm_vdis(self._session, vm_ref) if not vdi_refs: @@ -979,9 +999,9 @@ class VMOps(object): # 3. We have both kernel and ramdisk (kernel, ramdisk) = vm_utils.lookup_kernel_ramdisk(self._session, vm_ref) - - vm_utils.destroy_kernel_ramdisk(self._session, kernel, ramdisk) - LOG.debug(_("kernel/ramdisk files removed"), instance=instance) + if kernel or ramdisk: + vm_utils.destroy_kernel_ramdisk(self._session, kernel, ramdisk) + LOG.debug(_("kernel/ramdisk files removed"), instance=instance) def _destroy_rescue_instance(self, rescue_vm_ref, original_vm_ref): """Destroy a rescue instance.""" -- cgit