diff options
| author | Jenkins <jenkins@review.openstack.org> | 2012-01-03 19:00:14 +0000 |
|---|---|---|
| committer | Gerrit Code Review <review@openstack.org> | 2012-01-03 19:00:14 +0000 |
| commit | 8e217208e0ccc90ab2eb876d1662817f9346f9a2 (patch) | |
| tree | 8263601c474a954343b6bb6f36c0b83f89ba16ff | |
| parent | 3feee8a5184370f3e0d835d5d11a6ae62f722c24 (diff) | |
| parent | e695b8498c486d5b664d8e551e7182a102826cd2 (diff) | |
Merge "Adds running_deleted_instance_reaper task."
| -rw-r--r-- | nova/compute/manager.py | 73 | ||||
| -rw-r--r-- | nova/utils.py | 28 |
2 files changed, 101 insertions, 0 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index ab582114b..44d35b4a6 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -85,6 +85,17 @@ flags.DEFINE_integer("resize_confirm_window", 0, " Set to 0 to disable.") flags.DEFINE_integer('host_state_interval', 120, 'Interval in seconds for querying the host status') +flags.DEFINE_integer("running_deleted_instance_timeout", 0, + "Number of seconds after being deleted when a" + " still-running instance should be considered" + " eligible for cleanup.") +flags.DEFINE_integer("running_deleted_instance_poll_interval", 30, + "Number of periodic scheduler ticks to wait between" + " runs of the cleanup task.") +flags.DEFINE_string("running_deleted_instance_action", "noop", + "Action to take if a running deleted instance is" + " detected. Valid options are 'noop', 'log', and" + " 'reap'. Set to 'noop' to disable.") LOG = logging.getLogger('nova.compute.manager') @@ -2046,3 +2057,65 @@ class ComputeManager(manager.SchedulerDependentManager): 'details': fault.message, } self.db.instance_fault_create(context, values) + + @manager.periodic_task( + ticks_between_runs=FLAGS.running_deleted_instance_poll_interval) + def _cleanup_running_deleted_instances(self, context): + """Cleanup any instances which are erroneously still running after + having been deleted. + + Valid actions to take are: + + 1. noop - do nothing + 2. log - log which instances are erroneously running + 3. reap - shutdown and cleanup any erroneously running instances + + The use-case for this cleanup task is: for various reasons, it may be + possible for the database to show an instance as deleted but for that + instance to still be running on a host machine (see bug + https://bugs.launchpad.net/nova/+bug/911366). + + This cleanup task is a cross-hypervisor utility for finding these + zombied instances and either logging the discrepancy (likely what you + should do in production), or automatically reaping the instances (more + appropriate for dev environments). + """ + action = FLAGS.running_deleted_instance_action + + if action == "noop": + return + + present_name_labels = set(self.driver.list_instances()) + + # NOTE(sirp): admin contexts don't ordinarily return deleted records + with utils.temporary_mutation(context, read_deleted="yes"): + instances = self.db.instance_get_all_by_host(context, self.host) + for instance in instances: + present = instance.name in present_name_labels + erroneously_running = instance.deleted and present + old_enough = (not instance.deleted_at or utils.is_older_than( + instance.deleted_at, + FLAGS.running_deleted_instance_timeout)) + + if erroneously_running and old_enough: + instance_id = instance.id + name_label = instance.name + + if action == "log": + LOG.warning(_("Detected instance %(instance_id)s with" + " name label '%(name_label)s' which is" + " marked as DELETED but still present on" + " host."), locals()) + + elif action == 'reap': + LOG.info(_("Destroying instance %(instance_id)s with" + " name label '%(name_label)s' which is" + " marked as DELETED but still present on" + " host."), locals()) + self._shutdown_instance( + context, instance, 'Terminating', True) + self._cleanup_volumes(context, instance_id) + else: + raise Exception(_("Unrecognized value '%(action)s'" + " for FLAGS.running_deleted_" + "instance_action"), locals()) diff --git a/nova/utils.py b/nova/utils.py index e6a6115fc..f4ee906fe 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -1180,3 +1180,31 @@ def read_cached_file(filename, cache_info): cache_info['data'] = data cache_info['mtime'] = mtime return data + + +@contextlib.contextmanager +def temporary_mutation(obj, **kwargs): + """Temporarily set the attr on a particular object to a given value then + revert when finished. + + One use of this is to temporarily set the read_deleted flag on a context + object: + + with temporary_mutation(context, read_deleted="yes"): + do_something_that_needed_deleted_objects() + """ + NOT_PRESENT = object() + + old_values = {} + for attr, new_value in kwargs.items(): + old_values[attr] = getattr(obj, attr, NOT_PRESENT) + setattr(obj, attr, new_value) + + try: + yield + finally: + for attr, old_value in old_values.items(): + if old_value is NOT_PRESENT: + del obj[attr] + else: + setattr(obj, attr, old_value) |
