From c3568f9a1a28b269d88d2dcc175e6b5a2f7abf37 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 4 Apr 2013 12:57:32 -0700 Subject: Replace metadata joins with another query Right now, we always join metadata and system_metadata when we query for instances. This inflates the number of rows returned by a factor of ten at a minimum, and over 30 in some situations. This patch avoids the direct join of these tables in favor of a "manual join" which performs additional queries to fill the needed data. Latency may be increased, but with the gain of a potentially significant amount of reduction in data transfer. This also provides the ability for the queries which utilize it to selectively decide whether either or both of these additional fills are performed, paving the way for future high-level optimizations. Related to bug 1164737 Change-Id: I0610fb16ccce2ee95c318589c8abcc30613a3fe9 --- nova/db/api.py | 6 ++- nova/db/sqlalchemy/api.py | 119 ++++++++++++++++++++++++++++++++++++---------- nova/tests/test_db_api.py | 48 ++++++++++++++++++- 3 files changed, 144 insertions(+), 29 deletions(-) diff --git a/nova/db/api.py b/nova/db/api.py index bae086829..19a5e1f5f 100644 --- a/nova/db/api.py +++ b/nova/db/api.py @@ -604,11 +604,13 @@ def instance_get_all(context, columns_to_join=None): def instance_get_all_by_filters(context, filters, sort_key='created_at', - sort_dir='desc', limit=None, marker=None): + sort_dir='desc', limit=None, marker=None, + columns_to_join=None): """Get all instances that match all filters.""" return IMPL.instance_get_all_by_filters(context, filters, sort_key, sort_dir, limit=limit, - marker=marker) + marker=marker, + columns_to_join=columns_to_join) def instance_get_active_by_window_joined(context, begin, end=None, diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 65d762a7f..035779e41 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -1554,11 +1554,57 @@ def _build_instance_get(context, session=None): options(joinedload('system_metadata')) +def _instances_fill_metadata(context, instances, manual_joins=None): + """Selectively fill instances with manually-joined metadata. Note that + instance will be converted to a dict. + + :param context: security context + :param instances: list of instances to fill + :param manual_joins: list of tables to manually join (can be any + combination of 'metadata' and 'system_metadata' or + None to take the default of both) + """ + uuids = [inst['uuid'] for inst in instances] + + if manual_joins is None: + manual_joins = ['metadata', 'system_metadata'] + + meta = collections.defaultdict(list) + if 'metadata' in manual_joins: + for row in _instance_metadata_get_multi(context, uuids): + meta[row['instance_uuid']].append(row) + + sys_meta = collections.defaultdict(list) + if 'system_metadata' in manual_joins: + for row in _instance_system_metadata_get_multi(context, uuids): + sys_meta[row['instance_uuid']].append(row) + + filled_instances = [] + for inst in instances: + inst = dict(inst.iteritems()) + inst['system_metadata'] = sys_meta[inst['uuid']] + inst['metadata'] = meta[inst['uuid']] + filled_instances.append(inst) + + return filled_instances + + +def _manual_join_columns(columns_to_join): + manual_joins = [] + for column in ('metadata', 'system_metadata'): + if column in columns_to_join: + columns_to_join.remove(column) + manual_joins.append(column) + return manual_joins, columns_to_join + + @require_context def instance_get_all(context, columns_to_join=None): if columns_to_join is None: - columns_to_join = ['info_cache', 'security_groups', 'metadata', - 'system_metadata'] + columns_to_join = ['info_cache', 'security_groups', 'metadata'] + manual_joins = ['metadata', 'system_metadata'] + else: + manual_joins, columns_to_join = _manual_join_columns(columns_to_join) query = model_query(context, models.Instance) for column in columns_to_join: query = query.options(joinedload(column)) @@ -1568,12 +1614,14 @@ def instance_get_all(context, columns_to_join=None): query = query.filter_by(project_id=context.project_id) else: query = query.filter_by(user_id=context.user_id) - return query.all() + instances = query.all() + return _instances_fill_metadata(context, instances, manual_joins) @require_context def instance_get_all_by_filters(context, filters, sort_key, sort_dir, - limit=None, marker=None, session=None): + limit=None, marker=None, columns_to_join=None, + session=None): """Return instances that match all filters. Deleted instances will be returned by default, unless there's a filter that says otherwise""" @@ -1583,12 +1631,18 @@ def instance_get_all_by_filters(context, filters, sort_key, sort_dir, if not session: session = get_session() - query_prefix = session.query(models.Instance).\ - options(joinedload('info_cache')).\ - options(joinedload('security_groups')).\ - options(joinedload('system_metadata')).\ - options(joinedload('metadata')).\ - order_by(sort_fn[sort_dir](getattr(models.Instance, sort_key))) + if columns_to_join is None: + columns_to_join = ['info_cache', 'security_groups'] + manual_joins = ['metadata', 'system_metadata'] + else: + manual_joins, columns_to_join = _manual_join_columns(columns_to_join) + + query_prefix = session.query(models.Instance) + for column in columns_to_join: + query_prefix = query_prefix.options(joinedload(column)) + + query_prefix = query_prefix.order_by(sort_fn[sort_dir]( + getattr(models.Instance, sort_key))) # Make a copy of the filters dictionary to use going forward, as we'll # be modifying it and we shouldn't affect the caller's use of it. @@ -1646,8 +1700,7 @@ def instance_get_all_by_filters(context, filters, sort_key, sort_dir, marker=marker, sort_dir=sort_dir) - instances = query_prefix.all() - return instances + return _instances_fill_metadata(context, query_prefix.all(), manual_joins) def tag_filter(query, model, tag_model, tag_model_col, filters): @@ -1765,8 +1818,6 @@ def instance_get_active_by_window_joined(context, begin, end=None, query = query.options(joinedload('info_cache')).\ options(joinedload('security_groups')).\ - options(joinedload('metadata')).\ - options(joinedload('system_metadata')).\ filter(or_(models.Instance.terminated_at == None, models.Instance.terminated_at > begin)) if end: @@ -1776,14 +1827,13 @@ def instance_get_active_by_window_joined(context, begin, end=None, if host: query = query.filter_by(host=host) - return query.all() + return _instances_fill_metadata(context, query.all()) @require_admin_context def _instance_get_all_query(context, project_only=False, joins=None): if joins is None: - joins = ['info_cache', 'security_groups', 'metadata', - 'system_metadata'] + joins = ['info_cache', 'security_groups'] query = model_query(context, models.Instance, project_only=project_only) for join in joins: @@ -1793,19 +1843,22 @@ def _instance_get_all_query(context, project_only=False, joins=None): @require_admin_context def instance_get_all_by_host(context, host): - return _instance_get_all_query(context).filter_by(host=host).all() + return _instances_fill_metadata(context, + _instance_get_all_query(context).filter_by(host=host).all()) @require_admin_context def instance_get_all_by_host_and_node(context, host, node): - return _instance_get_all_query(context, joins=[]).filter_by(host=host).\ - filter_by(node=node).all() + return _instances_fill_metadata(context, + _instance_get_all_query(context, joins=[]).filter_by(host=host). + filter_by(node=node).all(), manual_joins=[]) @require_admin_context def instance_get_all_by_host_and_not_type(context, host, type_id=None): - return _instance_get_all_query(context).filter_by(host=host).\ - filter(models.Instance.instance_type_id != type_id).all() + return _instances_fill_metadata(context, + _instance_get_all_query(context).filter_by(host=host). + filter(models.Instance.instance_type_id != type_id).all()) # NOTE(jkoelker) This is only being left here for compat with floating @@ -1849,10 +1902,10 @@ def instance_get_all_hung_in_rebooting(context, reboot_window): reboot_window = (timeutils.utcnow() - datetime.timedelta(seconds=reboot_window)) - return model_query(context, models.Instance).\ - options(joinedload('system_metadata')).\ - filter(models.Instance.updated_at <= reboot_window).\ - filter_by(task_state=task_states.REBOOTING).all() + return _instances_fill_metadata(context, + model_query(context, models.Instance). + filter(models.Instance.updated_at <= reboot_window). + filter_by(task_state=task_states.REBOOTING).all()) @require_context @@ -3794,6 +3847,13 @@ def cell_get_all(context): ######################## # User-provided metadata +def _instance_metadata_get_multi(context, instance_uuids, session=None): + return model_query(context, models.InstanceMetadata, + session=session).\ + filter( + models.InstanceMetadata.instance_uuid.in_(instance_uuids)) + + def _instance_metadata_get_query(context, instance_uuid, session=None): return model_query(context, models.InstanceMetadata, session=session, read_deleted="no").\ @@ -3919,6 +3979,13 @@ def instance_metadata_update(context, instance_uuid, metadata, delete, # System-owned metadata +def _instance_system_metadata_get_multi(context, instance_uuids, session=None): + return model_query(context, models.InstanceSystemMetadata, + session=session).\ + filter( + models.InstanceSystemMetadata.instance_uuid.in_(instance_uuids)) + + def _instance_system_metadata_get_query(context, instance_uuid, session=None): return model_query(context, models.InstanceSystemMetadata, session=session).\ diff --git a/nova/tests/test_db_api.py b/nova/tests/test_db_api.py index b81b04be9..ad295e9a7 100644 --- a/nova/tests/test_db_api.py +++ b/nova/tests/test_db_api.py @@ -65,6 +65,20 @@ class DbTestCase(test.TestCase): args.update(kwargs) return db.instance_create(ctxt, args) + def fake_metadata(self, content): + meta = {} + for i in range(0, 10): + meta["foo%i" % i] = "this is %s item %i" % (content, i) + return meta + + def create_metadata_for_instance(self, instance_uuid): + meta = self.fake_metadata('metadata') + db.instance_metadata_update(self.context, instance_uuid, meta, False) + sys_meta = self.fake_metadata('system_metadata') + db.instance_system_metadata_update(self.context, instance_uuid, + sys_meta, False) + return meta, sys_meta + class DbApiTestCase(DbTestCase): def test_create_instance_unique_hostname(self): @@ -132,6 +146,37 @@ class DbApiTestCase(DbTestCase): check_exc_format(db.get_ec2_instance_id_by_uuid) check_exc_format(db.get_instance_uuid_by_ec2_id) + def test_instance_get_all_with_meta(self): + inst = self.create_instances_with_args() + fake_meta, fake_sys = self.create_metadata_for_instance(inst['uuid']) + result = db.instance_get_all(self.context) + for inst in result: + meta = utils.metadata_to_dict(inst['metadata']) + self.assertEqual(meta, fake_meta) + sys_meta = utils.metadata_to_dict(inst['system_metadata']) + self.assertEqual(sys_meta, fake_sys) + + def test_instance_get_all_by_filters_with_meta(self): + inst = self.create_instances_with_args() + fake_meta, fake_sys = self.create_metadata_for_instance(inst['uuid']) + result = db.instance_get_all_by_filters(self.context, {}) + for inst in result: + meta = utils.metadata_to_dict(inst['metadata']) + self.assertEqual(meta, fake_meta) + sys_meta = utils.metadata_to_dict(inst['system_metadata']) + self.assertEqual(sys_meta, fake_sys) + + def test_instance_get_all_by_filters_without_meta(self): + inst = self.create_instances_with_args() + fake_meta, fake_sys = self.create_metadata_for_instance(inst['uuid']) + result = db.instance_get_all_by_filters(self.context, {}, + columns_to_join=[]) + for inst in result: + meta = utils.metadata_to_dict(inst['metadata']) + self.assertEqual(meta, {}) + sys_meta = utils.metadata_to_dict(inst['system_metadata']) + self.assertEqual(sys_meta, {}) + def test_instance_get_all_by_filters(self): self.create_instances_with_args() self.create_instances_with_args() @@ -183,7 +228,8 @@ class DbApiTestCase(DbTestCase): self.assertEqual(1, len(instances)) instance = instances[0] self.assertEqual(expected['uuid'], instance['uuid']) - self.assertFalse('system_metadata' in dict(instance)) + sysmeta = dict(instance)['system_metadata'] + self.assertEqual(len(sysmeta), 0) def test_migration_get_unconfirmed_by_dest_compute(self): ctxt = context.get_admin_context() -- cgit