diff options
| author | Hans Lindgren <hanlind@kth.se> | 2013-04-17 15:52:56 +0200 |
|---|---|---|
| committer | Hans Lindgren <hanlind@kth.se> | 2013-05-08 15:10:17 +0200 |
| commit | 0f56d8ddb02f54ae389380dcd0790e55f2dcb479 (patch) | |
| tree | 8cdcb69a616ea10b75390c2c8fb30ba5905a83c0 /nova/compute | |
| parent | 36b10384724fec9657784980cd2bd38e72b445bc (diff) | |
Optimize instance queries in compute manager
Some instance queries against the db through the conductor can be
optimized to return a reduced set of instances as needed for the job
at hand. Most of these are part of periodic tasks, so the win is
kind of big.
Compute methods where queries can be made more efficient:
_get_instances_on_driver
_poll_rebooting_instances
_poll_rescued_instances
_reclaim_queued_deletes
_run_image_cache_manager_pass
Resolves bug 1169970.
Change-Id: I7c2fab48944e34765b3fff8ce10bc64a5cd826c8
Diffstat (limited to 'nova/compute')
| -rwxr-xr-x | nova/compute/manager.py | 93 |
1 files changed, 44 insertions, 49 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index dc0725933..d59e4a1f0 100755 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -393,15 +393,17 @@ class ComputeManager(manager.SchedulerDependentManager): 'trying to set it to ERROR'), instance_uuid=instance_uuid) - def _get_instances_on_driver(self, context): + def _get_instances_on_driver(self, context, filters=None): """Return a list of instance records that match the instances found on the hypervisor. """ + if not filters: + filters = {} try: driver_uuids = self.driver.list_instance_uuids() + filters['uuid'] = driver_uuids local_instances = self.conductor_api.instance_get_all_by_filters( - context, {'uuid': driver_uuids}, - columns_to_join=[]) + context, filters, columns_to_join=[]) local_instance_uuids = [inst['uuid'] for inst in local_instances] for uuid in set(driver_uuids) - set(local_instance_uuids): LOG.error(_('Instance %(uuid)s found in the hypervisor, but ' @@ -413,8 +415,8 @@ class ComputeManager(manager.SchedulerDependentManager): # The driver doesn't support uuids listing, so we'll have # to brute force. driver_instances = self.driver.list_instances() - instances = self.conductor_api.instance_get_all_by_host( - context, self.host, columns_to_join=[]) + instances = self.conductor_api.instance_get_all_by_filters( + context, filters, columns_to_join=[]) name_map = dict((instance['name'], instance) for instance in instances) local_instances = [] for driver_instance in driver_instances: @@ -436,11 +438,11 @@ class ComputeManager(manager.SchedulerDependentManager): not, destroy them. """ our_host = self.host - local_instances = self._get_instances_on_driver(context) + filters = {'deleted': False} + local_instances = self._get_instances_on_driver(context, filters) for instance in local_instances: instance_host = instance['host'] - instance_name = instance['name'] - if instance['host'] != our_host: + if instance_host != our_host: LOG.info(_('Deleting instance as its host (' '%(instance_host)s) is not equal to our ' 'host (%(our_host)s).'), @@ -3464,21 +3466,26 @@ class ComputeManager(manager.SchedulerDependentManager): @manager.periodic_task def _poll_rebooting_instances(self, context): if CONF.reboot_timeout > 0: - instances = self.conductor_api.instance_get_all_hung_in_rebooting( - context, CONF.reboot_timeout) - self.driver.poll_rebooting_instances(CONF.reboot_timeout, - instances) + filters = {'task_state': task_states.REBOOTING, + 'host': self.host} + rebooting = self.conductor_api.instance_get_all_by_filters( + context, filters, columns_to_join=[]) + + to_poll = [] + for instance in rebooting: + if timeutils.is_older_than(instance['updated_at'], + CONF.reboot_timeout): + to_poll.append(instance) + + self.driver.poll_rebooting_instances(CONF.reboot_timeout, to_poll) @manager.periodic_task def _poll_rescued_instances(self, context): if CONF.rescue_timeout > 0: - instances = self.conductor_api.instance_get_all_by_host( - context, self.host, columns_to_join=[]) - - rescued_instances = [] - for instance in instances: - if instance['vm_state'] == vm_states.RESCUED: - rescued_instances.append(instance) + filters = {'vm_state': vm_states.RESCUED, + 'host': self.host} + rescued_instances = self.conductor_api.instance_get_all_by_filters( + context, filters, columns_to_join=[]) to_unrescue = [] for instance in rescued_instances: @@ -3915,23 +3922,15 @@ class ComputeManager(manager.SchedulerDependentManager): LOG.debug(_("CONF.reclaim_instance_interval <= 0, skipping...")) return - instances = self.conductor_api.instance_get_all_by_host( - context, self.host, columns_to_join=[]) + filters = {'vm_state': vm_states.SOFT_DELETED, + 'host': self.host} + instances = self.conductor_api.instance_get_all_by_filters(context, + filters) for instance in instances: - old_enough = (not instance['deleted_at'] or - timeutils.is_older_than(instance['deleted_at'], - interval)) - soft_deleted = instance['vm_state'] == vm_states.SOFT_DELETED - - if soft_deleted and old_enough: + if self._deleted_old_enough(instance, interval): capi = self.conductor_api bdms = capi.block_device_mapping_get_all_by_instance( context, instance) - # NOTE(danms): We fetched instances above without the - # system_metadata for efficiency. If we get here, we need - # to re-fetch with it so that _delete_instace() can extract - # instance_type information. - instance = capi.instance_get_by_uuid(context, instance['uuid']) LOG.info(_('Reclaiming deleted instance'), instance=instance) # NOTE(comstud): Quotas were already accounted for when # the instance was soft deleted, so there's no need to @@ -4033,18 +4032,15 @@ class ComputeManager(manager.SchedulerDependentManager): but the hypervisor thinks is still running. """ timeout = CONF.running_deleted_instance_timeout + filters = {'deleted': True, + 'soft_deleted': False, + 'host': self.host} + instances = self._get_instances_on_driver(context, filters) + return [i for i in instances if self._deleted_old_enough(i, timeout)] - def deleted_instance(instance): - erroneously_running = instance['deleted'] - old_enough = (not instance['deleted_at'] or - timeutils.is_older_than(instance['deleted_at'], - timeout)) - if erroneously_running and old_enough: - return True - return False - - instances = self._get_instances_on_driver(context) - return [i for i in instances if deleted_instance(i)] + def _deleted_old_enough(self, instance, timeout): + return (not instance['deleted_at'] or + timeutils.is_older_than(instance['deleted_at'], timeout)) @contextlib.contextmanager def _error_out_instance_on_exception(self, context, instance_uuid, @@ -4104,8 +4100,6 @@ class ComputeManager(manager.SchedulerDependentManager): if CONF.image_cache_manager_interval == 0: return - all_instances = self.conductor_api.instance_get_all(context) - # Determine what other nodes use this storage storage_users.register_storage_use(CONF.instances_path, CONF.host) nodes = storage_users.get_storage_users(CONF.instances_path) @@ -4115,9 +4109,10 @@ class ComputeManager(manager.SchedulerDependentManager): # TODO(mikal): this should be further refactored so that the cache # cleanup code doesn't know what those instances are, just a remote # count, and then this logic should be pushed up the stack. - filtered_instances = [] - for instance in all_instances: - if instance['host'] in nodes: - filtered_instances.append(instance) + filters = {'deleted': False, + 'soft_deleted': True, + 'host': nodes} + filtered_instances = self.conductor_api.instance_get_all_by_filters( + context, filters, columns_to_join=[]) self.driver.manage_image_cache(context, filtered_instances) |
