summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2012-01-03 19:00:14 +0000
committerGerrit Code Review <review@openstack.org>2012-01-03 19:00:14 +0000
commit8e217208e0ccc90ab2eb876d1662817f9346f9a2 (patch)
tree8263601c474a954343b6bb6f36c0b83f89ba16ff
parent3feee8a5184370f3e0d835d5d11a6ae62f722c24 (diff)
parente695b8498c486d5b664d8e551e7182a102826cd2 (diff)
Merge "Adds running_deleted_instance_reaper task."
-rw-r--r--nova/compute/manager.py73
-rw-r--r--nova/utils.py28
2 files changed, 101 insertions, 0 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index ab582114b..44d35b4a6 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -85,6 +85,17 @@ flags.DEFINE_integer("resize_confirm_window", 0,
" Set to 0 to disable.")
flags.DEFINE_integer('host_state_interval', 120,
'Interval in seconds for querying the host status')
+flags.DEFINE_integer("running_deleted_instance_timeout", 0,
+ "Number of seconds after being deleted when a"
+ " still-running instance should be considered"
+ " eligible for cleanup.")
+flags.DEFINE_integer("running_deleted_instance_poll_interval", 30,
+ "Number of periodic scheduler ticks to wait between"
+ " runs of the cleanup task.")
+flags.DEFINE_string("running_deleted_instance_action", "noop",
+ "Action to take if a running deleted instance is"
+ " detected. Valid options are 'noop', 'log', and"
+ " 'reap'. Set to 'noop' to disable.")
LOG = logging.getLogger('nova.compute.manager')
@@ -2046,3 +2057,65 @@ class ComputeManager(manager.SchedulerDependentManager):
'details': fault.message,
}
self.db.instance_fault_create(context, values)
+
+ @manager.periodic_task(
+ ticks_between_runs=FLAGS.running_deleted_instance_poll_interval)
+ def _cleanup_running_deleted_instances(self, context):
+ """Cleanup any instances which are erroneously still running after
+ having been deleted.
+
+ Valid actions to take are:
+
+ 1. noop - do nothing
+ 2. log - log which instances are erroneously running
+ 3. reap - shutdown and cleanup any erroneously running instances
+
+ The use-case for this cleanup task is: for various reasons, it may be
+ possible for the database to show an instance as deleted but for that
+ instance to still be running on a host machine (see bug
+ https://bugs.launchpad.net/nova/+bug/911366).
+
+ This cleanup task is a cross-hypervisor utility for finding these
+ zombied instances and either logging the discrepancy (likely what you
+ should do in production), or automatically reaping the instances (more
+ appropriate for dev environments).
+ """
+ action = FLAGS.running_deleted_instance_action
+
+ if action == "noop":
+ return
+
+ present_name_labels = set(self.driver.list_instances())
+
+ # NOTE(sirp): admin contexts don't ordinarily return deleted records
+ with utils.temporary_mutation(context, read_deleted="yes"):
+ instances = self.db.instance_get_all_by_host(context, self.host)
+ for instance in instances:
+ present = instance.name in present_name_labels
+ erroneously_running = instance.deleted and present
+ old_enough = (not instance.deleted_at or utils.is_older_than(
+ instance.deleted_at,
+ FLAGS.running_deleted_instance_timeout))
+
+ if erroneously_running and old_enough:
+ instance_id = instance.id
+ name_label = instance.name
+
+ if action == "log":
+ LOG.warning(_("Detected instance %(instance_id)s with"
+ " name label '%(name_label)s' which is"
+ " marked as DELETED but still present on"
+ " host."), locals())
+
+ elif action == 'reap':
+ LOG.info(_("Destroying instance %(instance_id)s with"
+ " name label '%(name_label)s' which is"
+ " marked as DELETED but still present on"
+ " host."), locals())
+ self._shutdown_instance(
+ context, instance, 'Terminating', True)
+ self._cleanup_volumes(context, instance_id)
+ else:
+ raise Exception(_("Unrecognized value '%(action)s'"
+ " for FLAGS.running_deleted_"
+ "instance_action"), locals())
diff --git a/nova/utils.py b/nova/utils.py
index e6a6115fc..f4ee906fe 100644
--- a/nova/utils.py
+++ b/nova/utils.py
@@ -1180,3 +1180,31 @@ def read_cached_file(filename, cache_info):
cache_info['data'] = data
cache_info['mtime'] = mtime
return data
+
+
+@contextlib.contextmanager
+def temporary_mutation(obj, **kwargs):
+ """Temporarily set the attr on a particular object to a given value then
+ revert when finished.
+
+ One use of this is to temporarily set the read_deleted flag on a context
+ object:
+
+ with temporary_mutation(context, read_deleted="yes"):
+ do_something_that_needed_deleted_objects()
+ """
+ NOT_PRESENT = object()
+
+ old_values = {}
+ for attr, new_value in kwargs.items():
+ old_values[attr] = getattr(obj, attr, NOT_PRESENT)
+ setattr(obj, attr, new_value)
+
+ try:
+ yield
+ finally:
+ for attr, old_value in old_values.items():
+ if old_value is NOT_PRESENT:
+ del obj[attr]
+ else:
+ setattr(obj, attr, old_value)