Merge "Allow archiving deleted rows to shadow tables, for performance."

author: Jenkins <jenkins@review.openstack.org> 2013-02-15 13:20:38 +0000
committer: Gerrit Code Review <review@openstack.org> 2013-02-15 13:20:38 +0000
commit: 2ecf9e8bf11fb6ee930b47b0e64e26624b965af3 (patch)
tree: dacf07fbc3a5865c4ef276eafdc50f7b6c8f4b1b /nova/db
parent: 46fc860dc1b339fde70dbe3e8d3d75abebb4144e (diff)
parent: 961d615ce63002d99cd31d03c8c97228d9e453d3 (diff)
3 files changed, 198 insertions, 0 deletions
diff --git a/nova/db/api.py b/nova/db/api.py
index b07cd6b8b..6ec0b3a95 100644
--- a/nova/db/api.py
+++ b/nova/db/api.py
@@ -1715,3 +1715,25 @@ def task_log_get(context, task_name, period_beginning,
                  period_ending, host, state=None):
     return IMPL.task_log_get(context, task_name, period_beginning,
                  period_ending, host, state)
+
+
+####################
+
+
+def archive_deleted_rows(context, max_rows=None):
+    """Move up to max_rows rows from production tables to corresponding shadow
+    tables.
+
+    :returns: number of rows archived.
+    """
+    return IMPL.archive_deleted_rows(context, max_rows=max_rows)
+
+
+def archive_deleted_rows_for_table(context, tablename, max_rows=None):
+    """Move up to max_rows rows from tablename to corresponding shadow
+    table.
+
+    :returns: number of rows archived.
+    """
+    return IMPL.archive_deleted_rows_for_table(context, tablename,
+                                               max_rows=max_rows)
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index 96e7c6255..eb9181fce 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -26,13 +26,20 @@ import functools
 import uuid
 
 from sqlalchemy import and_
+from sqlalchemy import Boolean
 from sqlalchemy.exc import IntegrityError
+from sqlalchemy.exc import NoSuchTableError
+from sqlalchemy import Integer
+from sqlalchemy import MetaData
 from sqlalchemy import or_
 from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import joinedload_all
+from sqlalchemy.schema import Table
 from sqlalchemy.sql.expression import asc
 from sqlalchemy.sql.expression import desc
+from sqlalchemy.sql.expression import select
 from sqlalchemy.sql import func
+from sqlalchemy import String
 
 from nova import block_device
 from nova.compute import task_states
@@ -63,6 +70,7 @@ CONF.import_opt('sql_connection',
 
 LOG = logging.getLogger(__name__)
 
+get_engine = db_session.get_engine
 get_session = db_session.get_session
 
 
@@ -4786,3 +4794,94 @@ def task_log_end_task(context, task_name, period_beginning, period_ending,
         if rows == 0:
             #It's not running!
             raise exception.TaskNotRunning(task_name=task_name, host=host)
+
+
+def _get_default_deleted_value(table):
+    # TODO(dripton): It would be better to introspect the actual default value
+    # from the column, but I don't see a way to do that in the low-level APIs
+    # of SQLAlchemy 0.7.  0.8 has better introspection APIs, which we should
+    # use when Nova is ready to require 0.8.
+    deleted_column_type = table.c.deleted.type
+    if isinstance(deleted_column_type, Integer):
+        return 0
+    elif isinstance(deleted_column_type, Boolean):
+        return False
+    elif isinstance(deleted_column_type, String):
+        return ""
+    else:
+        return None
+
+
+@require_admin_context
+def archive_deleted_rows_for_table(context, tablename, max_rows=None):
+    """Move up to max_rows rows from one tables to the corresponding
+    shadow table.
+
+    :returns: number of rows archived
+    """
+    # The context argument is only used for the decorator.
+    if max_rows is None:
+        max_rows = 5000
+    engine = get_engine()
+    conn = engine.connect()
+    metadata = MetaData()
+    metadata.bind = engine
+    table = Table(tablename, metadata, autoload=True)
+    default_deleted_value = _get_default_deleted_value(table)
+    shadow_tablename = "shadow_" + tablename
+    rows_archived = 0
+    try:
+        shadow_table = Table(shadow_tablename, metadata, autoload=True)
+    except NoSuchTableError:
+        # No corresponding shadow table; skip it.
+        return rows_archived
+    # Group the insert and delete in a transaction.
+    with conn.begin():
+        # TODO(dripton): It would be more efficient to insert(select) and then
+        # delete(same select) without ever returning the selected rows back to
+        # Python.  sqlalchemy does not support that directly, but we have
+        # nova.db.sqlalchemy.utils.InsertFromSelect for the insert side.  We
+        # need a corresponding function for the delete side.
+        try:
+            column = table.c.id
+            column_name = "id"
+        except AttributeError:
+            # We have one table (dns_domains) where the key is called
+            # "domain" rather than "id"
+            column = table.c.domain
+            column_name = "domain"
+        query = select([table],
+                       table.c.deleted != default_deleted_value).\
+                       order_by(column).limit(max_rows)
+        rows = conn.execute(query).fetchall()
+        if rows:
+            insert_statement = shadow_table.insert()
+            conn.execute(insert_statement, rows)
+            keys = [getattr(row, column_name) for row in rows]
+            delete_statement = table.delete(column.in_(keys))
+            result = conn.execute(delete_statement)
+            rows_archived = result.rowcount
+    return rows_archived
+
+
+@require_admin_context
+def archive_deleted_rows(context, max_rows=None):
+    """Move up to max_rows rows from production tables to the corresponding
+    shadow tables.
+
+    :returns: Number of rows archived.
+    """
+    # The context argument is only used for the decorator.
+    if max_rows is None:
+        max_rows = 5000
+    tablenames = []
+    for model_class in models.__dict__.itervalues():
+        if hasattr(model_class, "__tablename__"):
+            tablenames.append(model_class.__tablename__)
+    rows_archived = 0
+    for tablename in tablenames:
+        rows_archived += archive_deleted_rows_for_table(context, tablename,
+                                         max_rows=max_rows - rows_archived)
+        if rows_archived >= max_rows:
+            break
+    return rows_archived
diff --git a/nova/db/sqlalchemy/migrate_repo/versions/154_add_shadow_tables.py b/nova/db/sqlalchemy/migrate_repo/versions/154_add_shadow_tables.py
new file mode 100644
index 000000000..7c9f69c2b
--- /dev/null
+++ b/nova/db/sqlalchemy/migrate_repo/versions/154_add_shadow_tables.py
@@ -0,0 +1,77 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2013 Red Hat, Inc.
+# Copyright 2013 OpenStack Foundation
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from sqlalchemy import BigInteger, Column, MetaData, Table
+from sqlalchemy.types import NullType
+
+from nova.openstack.common import log as logging
+
+LOG = logging.getLogger(__name__)
+
+
+def upgrade(migrate_engine):
+    meta = MetaData(migrate_engine)
+    meta.reflect(migrate_engine)
+    table_names = meta.tables.keys()
+
+    meta.bind = migrate_engine
+
+    for table_name in table_names:
+        if table_name.startswith('shadow'):
+            continue
+        table = Table(table_name, meta, autoload=True)
+
+        columns = []
+        for column in table.columns:
+            column_copy = None
+            # NOTE(boris-42): BigInteger is not supported by sqlite, so
+            #                 after copy it will have NullType, other
+            #                 types that are used in Nova are supported by
+            #                 sqlite.
+            if isinstance(column.type, NullType):
+                column_copy = Column(column.name, BigInteger(), default=0)
+            else:
+                column_copy = column.copy()
+            columns.append(column_copy)
+
+        shadow_table_name = 'shadow_' + table_name
+        shadow_table = Table(shadow_table_name, meta, *columns,
+                             mysql_engine='InnoDB')
+        try:
+            shadow_table.create()
+        except Exception:
+            LOG.info(repr(shadow_table))
+            LOG.exception(_('Exception while creating table.'))
+            raise
+
+
+def downgrade(migrate_engine):
+    meta = MetaData(migrate_engine)
+    meta.reflect(migrate_engine)
+    table_names = meta.tables.keys()
+
+    meta.bind = migrate_engine
+
+    for table_name in table_names:
+        if table_name.startswith('shadow'):
+            continue
+        shadow_table_name = 'shadow_' + table_name
+        shadow_table = Table(shadow_table_name, meta, autoload=True)
+        try:
+            shadow_table.drop()
+        except Exception:
+            LOG.error(_("table '%s' not dropped") % shadow_table_name)
author	Jenkins <jenkins@review.openstack.org>	2013-02-15 13:20:38 +0000
committer	Gerrit Code Review <review@openstack.org>	2013-02-15 13:20:38 +0000
commit	2ecf9e8bf11fb6ee930b47b0e64e26624b965af3 (patch)
tree	dacf07fbc3a5865c4ef276eafdc50f7b6c8f4b1b /nova/db
parent	46fc860dc1b339fde70dbe3e8d3d75abebb4144e (diff)
parent	961d615ce63002d99cd31d03c8c97228d9e453d3 (diff)