From abb764f51385a0b811b23379d78f7db027d4cca5 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 14:41:35 -0500 Subject: Automatically unrescue instances after a given timeout --- nova/compute/manager.py | 12 +++++- nova/utils.py | 7 ++++ nova/virt/libvirt_conn.py | 4 ++ nova/virt/xenapi/vmops.py | 95 +++++++++++++++++++++++++++++++++++------------ nova/virt/xenapi_conn.py | 4 ++ 5 files changed, 96 insertions(+), 26 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 576937cd8..3834c33ab 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -65,8 +65,11 @@ flags.DEFINE_string('console_host', socket.gethostname(), 'Console proxy host to use to connect to instances on' 'this host.') flags.DEFINE_integer('live_migration_retry_count', 30, - ("Retry count needed in live_migration." - " sleep 1 sec for each count")) + "Retry count needed in live_migration." + " sleep 1 sec for each count") +flags.DEFINE_integer("rescue_timeout", 0, + "Automatically unrescue an instance after N hours." + " Set to 0 to disable.") LOG = logging.getLogger('nova.compute.manager') @@ -132,6 +135,11 @@ class ComputeManager(manager.Manager): """ self.driver.init_host(host=self.host) + def periodic_tasks(self, context=None): + """Tasks to be run at a periodic interval.""" + super(ComputeManager, self).periodic_tasks(context) + self.driver.poll_rescued_instances(FLAGS.rescue_timeout) + def _update_state(self, context, instance_id): """Update the state of an instance from the driver info.""" # FIXME(ja): include other fields from state? diff --git a/nova/utils.py b/nova/utils.py index 499af2039..38cdb8021 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -334,6 +334,13 @@ def utcnow(): utcnow.override_time = None +def is_then_greater(then, seconds): + if utcnow() - then > datetime.timedelta(seconds=seconds): + return True + else: + return False + + def utcnow_ts(): """Timestamp version of our utcnow function.""" return time.mktime(utcnow().timetuple()) diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index e80b9fbdf..07545382d 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -412,6 +412,10 @@ class LibvirtConnection(object): # the normal xml file, we can just call reboot here self.reboot(instance) + @exception.wrap_exception + def poll_rescued_instances(self, timeout): + pass + @exception.wrap_exception def spawn(self, instance): xml = self.to_xml(instance) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 61ff00903..f46ac3b7e 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -51,6 +51,7 @@ class VMOps(object): def __init__(self, session): self.XenAPI = session.get_imported_xenapi() self._session = session + self.poll_rescue_last_ran = None VMHelper.XenAPI = self.XenAPI @@ -462,6 +463,10 @@ class VMOps(object): except self.XenAPI.Failure, exc: LOG.exception(exc) + def _shutdown_rescue(self, vm_ref): + """Shutdown a rescue instance""" + self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm_ref) + def _destroy_vdis(self, instance, vm_ref): """Destroys all VDIs associated with a VM""" instance_id = instance.id @@ -479,6 +484,24 @@ class VMOps(object): except self.XenAPI.Failure, exc: LOG.exception(exc) + def _destroy_rescue_vdis(self, rescue_vm_ref): + """Destroys all VDIs associated with a rescued VM""" + vdi_refs = VMHelper.lookup_vm_vdis(self._session, rescue_vm_ref) + for vdi_ref in vdi_refs: + try: + self._session.call_xenapi("Async.VDI.destroy", vdi_ref) + except self.XenAPI.Failure: + continue + + def _destroy_rescue_vbds(self, rescue_vm_ref): + """Destroys all VBDs tied to a rescue VM""" + vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref) + for vbd_ref in vbd_refs: + _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref) + if _vbd_ref["userdevice"] == "1": + VMHelper.unplug_vbd(self._session, vbd_ref) + VMHelper.destroy_vbd(self._session, vbd_ref) + def _destroy_kernel_ramdisk(self, instance, vm_ref): """ Three situations can occur: @@ -529,6 +552,10 @@ class VMOps(object): LOG.debug(_("Instance %(instance_id)s VM destroyed") % locals()) + def _destroy_rescue(self, vm_ref): + """Destroy a rescue instance""" + self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref) + def destroy(self, instance): """ Destroy VM instance @@ -632,41 +659,61 @@ class VMOps(object): """ rescue_vm_ref = VMHelper.lookup(self._session, - instance.name + "-rescue") + instance.name + "-rescue") if not rescue_vm_ref: raise exception.NotFound(_( "Instance is not in Rescue Mode: %s" % instance.name)) original_vm_ref = self._get_vm_opaque_ref(instance) - vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref) - instance._rescue = False - for vbd_ref in vbd_refs: - _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref) - if _vbd_ref["userdevice"] == "1": - VMHelper.unplug_vbd(self._session, vbd_ref) - VMHelper.destroy_vbd(self._session, vbd_ref) - - task1 = self._session.call_xenapi("Async.VM.hard_shutdown", - rescue_vm_ref) - self._session.wait_for_task(task1, instance.id) - - vdi_refs = VMHelper.lookup_vm_vdis(self._session, rescue_vm_ref) - for vdi_ref in vdi_refs: - try: - task = self._session.call_xenapi('Async.VDI.destroy', vdi_ref) - self._session.wait_for_task(task, instance.id) - except self.XenAPI.Failure: - continue - - task2 = self._session.call_xenapi('Async.VM.destroy', rescue_vm_ref) - self._session.wait_for_task(task2, instance.id) - + self._destroy_rescue_vbds(rescue_vm_ref) + self._shutdown_rescue(rescue_vm_ref) + self._destroy_rescue_vdis(rescue_vm_ref) + self._destroy_rescue(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._start(instance, original_vm_ref) + def poll_rescued_instances(self, timeout): + """Look for expirable rescued instances + - forcibly exit rescue mode for any instances that have been + in rescue mode for >= the provided timeout + """ + last_ran = self.poll_rescue_last_ran + if last_ran: + if not utils.is_then_greater(last_ran, timeout * 60 * 60): + # Do not run. Let's bail. + return + else: + # Update the time tracker and proceed. + self.poll_rescue_last_ran = utils.utcnow() + else: + # We need a base time to start tracking. + self.poll_rescue_last_ran = utils.utcnow() + return + + vms = [] + for instance in self.list_instances(): + if instance.endswith("-rescue"): + vms.append(dict(name=instance, + vm_ref=VMHelper.lookup(self._session, + instance))) + + for vm in vms: + rescue_name = vm["name"] + rescue_vm_ref = vm["vm_ref"] + original_name = vm["name"].split("-rescue", 1)[0] + original_vm_ref = VMHelper.lookup(self._session, original_name) + + self._destroy_rescue_vbds(rescue_vm_ref) + self._shutdown_rescue(rescue_vm_ref) + self._destroy_rescue_vdis(rescue_vm_ref) + self._destroy_rescue(rescue_vm_ref) + self._release_bootlock(original_vm_ref) + self._session.call_xenapi("VM.start", original_vm_ref, False, + False) + def get_info(self, instance): """Return data about VM instance""" vm_ref = self._get_vm_opaque_ref(instance) diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py index da42a83b6..50aad96b8 100644 --- a/nova/virt/xenapi_conn.py +++ b/nova/virt/xenapi_conn.py @@ -225,6 +225,10 @@ class XenAPIConnection(object): """Unrescue the specified instance""" self._vmops.unrescue(instance, callback) + def poll_rescued_instances(self, timeout): + """Poll for rescued instances""" + self._vmops.poll_rescued_instances(timeout) + def reset_network(self, instance): """reset networking for specified instance""" self._vmops.reset_network(instance) -- cgit From a291e68fef876080d7984a1d7192e939808596bf Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 14:55:33 -0500 Subject: Fixed some typos --- nova/virt/xenapi/vmops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index cb36730a0..0a516bd36 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -463,7 +463,7 @@ class VMOps(object): except self.XenAPI.Failure, exc: LOG.exception(exc) - def _shutdown_rescue(self, vm_ref): + def _shutdown_rescue(self, rescue_vm_ref): """Shutdown a rescue instance""" self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm_ref) @@ -552,7 +552,7 @@ class VMOps(object): LOG.debug(_("Instance %(instance_id)s VM destroyed") % locals()) - def _destroy_rescue(self, vm_ref): + def _destroy_rescue_instance(self, rescue_vm_ref): """Destroy a rescue instance""" self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref) @@ -671,7 +671,7 @@ class VMOps(object): self._destroy_rescue_vbds(rescue_vm_ref) self._shutdown_rescue(rescue_vm_ref) self._destroy_rescue_vdis(rescue_vm_ref) - self._destroy_rescue(rescue_vm_ref) + self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._start(instance, original_vm_ref) @@ -709,7 +709,7 @@ class VMOps(object): self._destroy_rescue_vbds(rescue_vm_ref) self._shutdown_rescue(rescue_vm_ref) self._destroy_rescue_vdis(rescue_vm_ref) - self._destroy_rescue(rescue_vm_ref) + self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._session.call_xenapi("VM.start", original_vm_ref, False, False) -- cgit From 83e519b734078d8214fa0dc1d518607c7c0b244a Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 15:21:18 -0500 Subject: Only run periodic task when rescue_timeout is greater than 0 --- nova/compute/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 3834c33ab..9cb210c77 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -138,7 +138,8 @@ class ComputeManager(manager.Manager): def periodic_tasks(self, context=None): """Tasks to be run at a periodic interval.""" super(ComputeManager, self).periodic_tasks(context) - self.driver.poll_rescued_instances(FLAGS.rescue_timeout) + if FLAGS.rescue_timeout > 0: + self.driver.poll_rescued_instances(FLAGS.rescue_timeout) def _update_state(self, context, instance_id): """Update the state of an instance from the driver info.""" -- cgit From b3a8c70304672abe9b461c6cfeed3e8b517ca0b6 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 16:56:54 -0500 Subject: Added docstring --- nova/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nova/utils.py b/nova/utils.py index 38cdb8021..bf1aa4a91 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -335,6 +335,7 @@ utcnow.override_time = None def is_then_greater(then, seconds): + """Return True of 'then' is greater than 'seconds'""" if utcnow() - then > datetime.timedelta(seconds=seconds): return True else: -- cgit From a12b6f0a0808fba5541723a537118447b55b69ad Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 17:15:41 -0500 Subject: Better method name --- nova/utils.py | 6 +++--- nova/virt/xenapi/vmops.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nova/utils.py b/nova/utils.py index bf1aa4a91..04b6c9778 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -334,9 +334,9 @@ def utcnow(): utcnow.override_time = None -def is_then_greater(then, seconds): - """Return True of 'then' is greater than 'seconds'""" - if utcnow() - then > datetime.timedelta(seconds=seconds): +def is_older_than(before, seconds): + """Return True if before is older than 'seconds'""" + if utcnow() - before > datetime.timedelta(seconds=seconds): return True else: return False diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 0a516bd36..3f1eceddc 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -682,7 +682,7 @@ class VMOps(object): """ last_ran = self.poll_rescue_last_ran if last_ran: - if not utils.is_then_greater(last_ran, timeout * 60 * 60): + if not utils.is_older_than(last_ran, timeout * 60 * 60): # Do not run. Let's bail. return else: -- cgit From e19b12f668fb6cd693df6834f8895fb5487953d7 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 18:34:47 -0500 Subject: Review feedback --- nova/compute/manager.py | 2 +- nova/virt/xenapi/vmops.py | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 9cb210c77..ce1ae87e3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -68,7 +68,7 @@ flags.DEFINE_integer('live_migration_retry_count', 30, "Retry count needed in live_migration." " sleep 1 sec for each count") flags.DEFINE_integer("rescue_timeout", 0, - "Automatically unrescue an instance after N hours." + "Automatically unrescue an instance after N seconds." " Set to 0 to disable.") LOG = logging.getLogger('nova.compute.manager') diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 3f1eceddc..1f2e10aa6 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -497,8 +497,8 @@ class VMOps(object): """Destroys all VBDs tied to a rescue VM""" vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref) for vbd_ref in vbd_refs: - _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref) - if _vbd_ref["userdevice"] == "1": + vbd_rec = self._session.get_xenapi().VBD.get_record(vbd_ref) + if vbd_rec["userdevice"] == "1": # primary VBD is always 1 VMHelper.unplug_vbd(self._session, vbd_ref) VMHelper.destroy_vbd(self._session, vbd_ref) @@ -554,6 +554,10 @@ class VMOps(object): def _destroy_rescue_instance(self, rescue_vm_ref): """Destroy a rescue instance""" + self._destroy_rescue_vbds(rescue_vm_ref) + self._shutdown_rescue(rescue_vm_ref) + self._destroy_rescue_vdis(rescue_vm_ref) + self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref) def destroy(self, instance): @@ -668,9 +672,6 @@ class VMOps(object): original_vm_ref = self._get_vm_opaque_ref(instance) instance._rescue = False - self._destroy_rescue_vbds(rescue_vm_ref) - self._shutdown_rescue(rescue_vm_ref) - self._destroy_rescue_vdis(rescue_vm_ref) self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._start(instance, original_vm_ref) @@ -682,7 +683,7 @@ class VMOps(object): """ last_ran = self.poll_rescue_last_ran if last_ran: - if not utils.is_older_than(last_ran, timeout * 60 * 60): + if not utils.is_older_than(last_ran, timeout): # Do not run. Let's bail. return else: @@ -693,23 +694,22 @@ class VMOps(object): self.poll_rescue_last_ran = utils.utcnow() return - vms = [] + rescue_vms = [] for instance in self.list_instances(): if instance.endswith("-rescue"): - vms.append(dict(name=instance, - vm_ref=VMHelper.lookup(self._session, - instance))) + rescue_vms.append(dict(name=instance, + vm_ref=VMHelper.lookup(self._session, + instance))) - for vm in vms: + for vm in rescue_vms: rescue_name = vm["name"] rescue_vm_ref = vm["vm_ref"] + + self._destroy_rescue_instance(rescue_vm_ref) + original_name = vm["name"].split("-rescue", 1)[0] original_vm_ref = VMHelper.lookup(self._session, original_name) - self._destroy_rescue_vbds(rescue_vm_ref) - self._shutdown_rescue(rescue_vm_ref) - self._destroy_rescue_vdis(rescue_vm_ref) - self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._session.call_xenapi("VM.start", original_vm_ref, False, False) -- cgit From 10e61af8a23c126c15fcfcf25156d32facf19ec2 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 22:55:04 -0500 Subject: Added hyperv stub --- nova/virt/hyperv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nova/virt/hyperv.py b/nova/virt/hyperv.py index 29d18dac5..75fed6d4f 100644 --- a/nova/virt/hyperv.py +++ b/nova/virt/hyperv.py @@ -467,3 +467,6 @@ class HyperVConnection(object): if vm is None: raise exception.NotFound('Cannot detach volume from missing %s ' % instance_name) + + def poll_rescued_instances(self, timeout): + pass -- cgit