From 8de35027ea4a1c0d58915ff0c67fb7f65700cd07 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Mon, 8 Apr 2013 16:15:44 -0700 Subject: Optimize some of compute/manager's periodic tasks' DB queries Most of these tasks do not need to fetch instances with a full complement of metadata items. Make them say "no thanks" to the DB API to reduce the amount of work that needs to be done. Note some refactoring was done in the _get_instance_nw_info() method to pull this information if it ended up being necessary. The cases where this would happen are error-cleanup scenarios, and thus taking the extra hit in that case is an overall win for not having to do it all the time. Related to bug 1164737 Change-Id: Ic7d6290c0b7edf84cbf8bdc64e29f206619f97e5 --- nova/compute/manager.py | 39 ++++++++++++++++++++++++++------------ nova/conductor/api.py | 4 ++-- nova/tests/compute/test_compute.py | 21 ++++++++++++++------ 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f6836d361..ed9a50ec8 100755 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -396,7 +396,8 @@ class ComputeManager(manager.SchedulerDependentManager): try: driver_uuids = self.driver.list_instance_uuids() local_instances = self.conductor_api.instance_get_all_by_filters( - context, {'uuid': driver_uuids}) + context, {'uuid': driver_uuids}, + columns_to_join=[]) local_instance_uuids = [inst['uuid'] for inst in local_instances] for uuid in set(driver_uuids) - set(local_instance_uuids): LOG.error(_('Instance %(uuid)s found in the hypervisor, but ' @@ -408,8 +409,8 @@ class ComputeManager(manager.SchedulerDependentManager): # The driver doesn't support uuids listing, so we'll have # to brute force. driver_instances = self.driver.list_instances() - instances = self.conductor_api.instance_get_all_by_host(context, - self.host) + instances = self.conductor_api.instance_get_all_by_host( + context, self.host, columns_to_join=[]) name_map = dict((instance['name'], instance) for instance in instances) local_instances = [] for driver_instance in driver_instances: @@ -703,6 +704,15 @@ class ComputeManager(manager.SchedulerDependentManager): def _get_instance_nw_info(self, context, instance): """Get a list of dictionaries of network data of an instance.""" + if (not hasattr(instance, 'system_metadata') or + len(instance['system_metadata']) == 0): + # NOTE(danms): Several places in the code look up instances without + # pulling system_metadata for performance, and call this function. + # If we get an instance without it, re-fetch so that the call + # to network_api (which requires it for instance_type) will + # succeed. + instance = self.conductor_api.instance_get_by_uuid( + context, instance['uuid']) network_info = self.network_api.get_instance_nw_info(context, instance, conductor_api=self.conductor_api) return network_info @@ -3423,7 +3433,7 @@ class ComputeManager(manager.SchedulerDependentManager): else: # No more in our copy of uuids. Pull from the DB. db_instances = self.conductor_api.instance_get_all_by_host( - context, self.host) + context, self.host, columns_to_join=[]) if not db_instances: # None.. just return. return @@ -3453,8 +3463,8 @@ class ComputeManager(manager.SchedulerDependentManager): @manager.periodic_task def _poll_rescued_instances(self, context): if CONF.rescue_timeout > 0: - instances = self.conductor_api.instance_get_all_by_host(context, - self.host) + instances = self.conductor_api.instance_get_all_by_host( + context, self.host, columns_to_join=[]) rescued_instances = [] for instance in instances: @@ -3585,8 +3595,8 @@ class ComputeManager(manager.SchedulerDependentManager): self._last_bw_usage_poll = curr_time LOG.info(_("Updating bandwidth usage cache")) - instances = self.conductor_api.instance_get_all_by_host(context, - self.host) + instances = self.conductor_api.instance_get_all_by_host( + context, self.host, columns_to_join=[]) try: bw_counters = self.driver.get_all_bw_counters(instances) except NotImplementedError: @@ -3738,8 +3748,8 @@ class ComputeManager(manager.SchedulerDependentManager): loop, one database record at a time, checking if the hypervisor has the same power state as is in the database. """ - db_instances = self.conductor_api.instance_get_all_by_host(context, - self.host) + db_instances = self.conductor_api.instance_get_all_by_host( + context, self.host, columns_to_join=[]) num_vm_instances = self.driver.get_num_instances() num_db_instances = len(db_instances) @@ -3895,8 +3905,8 @@ class ComputeManager(manager.SchedulerDependentManager): LOG.debug(_("CONF.reclaim_instance_interval <= 0, skipping...")) return - instances = self.conductor_api.instance_get_all_by_host(context, - self.host) + instances = self.conductor_api.instance_get_all_by_host( + context, self.host, columns_to_join=[]) for instance in instances: old_enough = (not instance['deleted_at'] or timeutils.is_older_than(instance['deleted_at'], @@ -3907,6 +3917,11 @@ class ComputeManager(manager.SchedulerDependentManager): capi = self.conductor_api bdms = capi.block_device_mapping_get_all_by_instance( context, instance) + # NOTE(danms): We fetched instances above without the + # system_metadata for efficiency. If we get here, we need + # to re-fetch with it so that _delete_instace() can extract + # instance_type information. + instance = capi.instance_get_by_uuid(context, instance['uuid']) LOG.info(_('Reclaiming deleted instance'), instance=instance) # NOTE(comstud): Quotas were already accounted for when # the instance was soft deleted, so there's no need to diff --git a/nova/conductor/api.py b/nova/conductor/api.py index 1a0d24d66..a0b94b549 100644 --- a/nova/conductor/api.py +++ b/nova/conductor/api.py @@ -77,8 +77,8 @@ class LocalAPI(object): return self._manager.instance_get_all(context) def instance_get_all_by_host(self, context, host, columns_to_join=None): - return self._manager.instance_get_all_by_host(context, host, - columns_to_join) + return self._manager.instance_get_all_by_host( + context, host, columns_to_join=columns_to_join) def instance_get_all_by_host_and_node(self, context, host, node): return self._manager.instance_get_all_by_host(context, host, node) diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 612bc5969..8ef7f9311 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -3613,14 +3613,18 @@ class ComputeTestCase(BaseTestCase): def test_get_instance_nw_info(self): fake_network.unset_stub_network_methods(self.stubs) - fake_instance = 'fake-instance' + fake_instance = {'uuid': 'fake-instance'} fake_nw_info = network_model.NetworkInfo() self.mox.StubOutWithMock(self.compute.network_api, 'get_instance_nw_info') self.mox.StubOutWithMock(self.compute.conductor_api, 'instance_info_cache_update') + self.mox.StubOutWithMock(self.compute.conductor_api, + 'instance_get_by_uuid') + self.compute.conductor_api.instance_get_by_uuid( + self.context, fake_instance['uuid']).AndReturn(fake_instance) self.compute.network_api.get_instance_nw_info(self.context, fake_instance, conductor_api=self.compute.conductor_api ).AndReturn(fake_nw_info) @@ -3646,8 +3650,9 @@ class ComputeTestCase(BaseTestCase): call_info = {'get_all_by_host': 0, 'get_by_uuid': 0, 'get_nw_info': 0, 'expected_instance': None} - def fake_instance_get_all_by_host(context, host): + def fake_instance_get_all_by_host(context, host, columns_to_join): call_info['get_all_by_host'] += 1 + self.assertEqual(columns_to_join, []) return instances[:] def fake_instance_get_by_uuid(context, instance_uuid): @@ -3723,7 +3728,8 @@ class ComputeTestCase(BaseTestCase): 'launched_at': not_timed_out_time}] unrescued_instances = {'fake_uuid1': False, 'fake_uuid4': False} - def fake_instance_get_all_by_host(context, host): + def fake_instance_get_all_by_host(context, host, columns_to_join): + self.assertEqual(columns_to_join, []) return instances def fake_unrescue(self, context, instance): @@ -4342,8 +4348,10 @@ class ComputeTestCase(BaseTestCase): self.compute.driver.list_instance_uuids().AndReturn( [inst['uuid'] for inst in driver_instances]) self.compute.conductor_api.instance_get_all_by_filters( - fake_context, {'uuid': [inst['uuid'] for - inst in driver_instances]}).AndReturn( + fake_context, + {'uuid': [inst['uuid'] for + inst in driver_instances]}, + columns_to_join=[]).AndReturn( driver_instances) self.mox.ReplayAll() @@ -4377,7 +4385,8 @@ class ComputeTestCase(BaseTestCase): self.compute.driver.list_instances().AndReturn( [inst['name'] for inst in driver_instances]) self.compute.conductor_api.instance_get_all_by_host( - fake_context, self.compute.host).AndReturn(all_instances) + fake_context, self.compute.host, + columns_to_join=[]).AndReturn(all_instances) self.mox.ReplayAll() -- cgit