From 17f9473be7ec32b468d26c7304596ae51901a8e6 Mon Sep 17 00:00:00 2001 From: Rick Harris Date: Wed, 23 May 2012 18:03:08 +0000 Subject: Make validate_compacted_migration into general diff tool. This patch removes some assumptions from the tool and turns it in to a generalized schema diffing tool where any two versions of the DB schema, represented by branch-version pairs, can be diffed. To reflect the more genernal nature, the name was changed to schema_diff.py. Change-Id: I4dc233fa2d4b084779922893f76baa753fe2c126 --- tools/db/schema_diff.py | 270 +++++++++++++++++++++++++++++++ tools/db/validate_compacted_migration.py | 241 --------------------------- 2 files changed, 270 insertions(+), 241 deletions(-) create mode 100755 tools/db/schema_diff.py delete mode 100755 tools/db/validate_compacted_migration.py diff --git a/tools/db/schema_diff.py b/tools/db/schema_diff.py new file mode 100755 index 000000000..7408978e5 --- /dev/null +++ b/tools/db/schema_diff.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2012 OpenStack LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Utility for diff'ing two versions of the DB schema. + +Each release cycle the plan is to compact all of the migrations from that +release into a single file. This is a manual and, unfortunately, error-prone +process. To ensure that the schema doesn't change, this tool can be used to +diff the compacted DB schema to the original, uncompacted form. + + +The schema versions are specified by providing a git ref (a branch name or +commit hash) and a SQLAlchemy-Migrate version number: +Run like: + + ./tools/db/schema_diff.py mysql master:latest my_branch:82 +""" +import datetime +import glob +import os +import subprocess +import sys + + +### Dump + + +def dump_db(db_driver, db_name, migration_version, dump_filename): + db_driver.create(db_name) + try: + migrate(db_driver, db_name, migration_version) + db_driver.dump(db_name, dump_filename) + finally: + db_driver.drop(db_name) + + +### Diff + + +def diff_files(filename1, filename2): + pipeline = ['diff -U 3 %(filename1)s %(filename2)s' % locals()] + + # Use colordiff if available + if subprocess.call(['which', 'colordiff']) == 0: + pipeline.append('colordiff') + + pipeline.append('less -R') + + cmd = ' | '.join(pipeline) + subprocess.check_call(cmd, shell=True) + + +### Database + + +class MySQL(object): + def create(self, name): + subprocess.check_call(['mysqladmin', '-u', 'root', 'create', name]) + + def drop(self, name): + subprocess.check_call(['mysqladmin', '-f', '-u', 'root', 'drop', name]) + + def dump(self, name, dump_filename): + subprocess.check_call( + 'mysqldump -u root %(name)s > %(dump_filename)s' % locals(), + shell=True) + + def url(self, name): + return 'mysql://root@localhost/%s' % name + + +class Postgres(object): + def create(self, name): + subprocess.check_call(['createdb', name]) + + def drop(self, name): + subprocess.check_call(['dropdb', name]) + + def dump(self, name, dump_filename): + subprocess.check_call( + 'pg_dump %(name)s > %(dump_filename)s' % locals(), + shell=True) + + def url(self, name): + return 'postgres://localhost/%s' % name + + +def _get_db_driver_class(db_type): + if db_type == "mysql": + return MySQL + elif db_type == "postgres": + return Postgres + else: + raise Exception("database %s not supported" % db_type) + + +### Migrate + + +MIGRATE_REPO = os.path.join(os.getcwd(), "nova/db/sqlalchemy/migrate_repo") + + +def migrate(db_driver, db_name, migration_version): + earliest_version = _migrate_get_earliest_version() + + # NOTE(sirp): sqlalchemy-migrate currently cannot handle the skipping of + # migration numbers. + _migrate_cmd( + db_driver, db_name, 'version_control', str(earliest_version - 1)) + + upgrade_cmd = ['upgrade'] + if migration_version != 'latest': + upgrade_cmd.append(str(migration_version)) + + _migrate_cmd(db_driver, db_name, *upgrade_cmd) + + +def _migrate_cmd(db_driver, db_name, *cmd): + manage_py = os.path.join(MIGRATE_REPO, 'manage.py') + + args = ['python', manage_py] + args += cmd + args += ['--repository=%s' % MIGRATE_REPO, + '--url=%s' % db_driver.url(db_name)] + + subprocess.check_call(args) + + +def _migrate_get_earliest_version(): + versions_glob = os.path.join(MIGRATE_REPO, 'versions', '???_*.py') + + versions = [] + for path in glob.iglob(versions_glob): + filename = os.path.basename(path) + prefix = filename.split('_', 1)[0] + try: + version = int(prefix) + except ValueError: + pass + versions.append(version) + + versions.sort() + return versions[0] + + +### Git + + +def git_current_branch_name(): + ref_name = git_symbolic_ref('HEAD', quiet=True) + current_branch_name = ref_name.replace('refs/heads/', '') + return current_branch_name + + +def git_symbolic_ref(ref, quiet=False): + args = ['git', 'symbolic-ref', ref] + if quiet: + args.append('-q') + proc = subprocess.Popen(args, stdout=subprocess.PIPE) + stdout, stderr = proc.communicate() + return stdout.strip() + + +def git_checkout(branch_name): + subprocess.check_call(['git', 'checkout', branch_name]) + + +def git_has_uncommited_changes(): + return subprocess.call(['git', 'diff', '--quiet', '--exit-code']) == 1 + + +### Command + + +def die(msg): + print >> sys.stderr, "ERROR: %s" % msg + sys.exit(1) + + +def usage(msg=None): + if msg: + print >> sys.stderr, "ERROR: %s" % msg + + prog = "schema_diff.py" + args = ["", "", + ""] + + print >> sys.stderr, "usage: %s %s" % (prog, ' '.join(args)) + sys.exit(1) + + +def parse_options(): + try: + db_type = sys.argv[1] + except IndexError: + usage("must specify DB type") + + try: + orig_branch, orig_version = sys.argv[2].split(':') + except IndexError: + usage('original branch and version required (e.g. master:82)') + + try: + new_branch, new_version = sys.argv[3].split(':') + except IndexError: + usage('new branch and version required (e.g. master:82)') + + return db_type, orig_branch, orig_version, new_branch, new_version + + +def main(): + timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + + ORIG_DB = 'orig_db_%s' % timestamp + NEW_DB = 'new_db_%s' % timestamp + + ORIG_DUMP = ORIG_DB + ".dump" + NEW_DUMP = NEW_DB + ".dump" + + db_type, orig_branch, orig_version, new_branch, new_version =\ + parse_options() + + # Since we're going to be switching branches, ensure user doesn't have any + # uncommited changes + if git_has_uncommited_changes(): + die("You have uncommited changes. Please commit them before running " + "this command.") + + db_driver = _get_db_driver_class(db_type)() + + users_branch = git_current_branch_name() + git_checkout(orig_branch) + + try: + # Dump Original Schema + dump_db(db_driver, ORIG_DB, orig_version, ORIG_DUMP) + + # Dump New Schema + git_checkout(new_branch) + dump_db(db_driver, NEW_DB, new_version, NEW_DUMP) + + diff_files(ORIG_DUMP, NEW_DUMP) + finally: + git_checkout(users_branch) + + if os.path.exists(ORIG_DUMP): + os.unlink(ORIG_DUMP) + + if os.path.exists(NEW_DUMP): + os.unlink(NEW_DUMP) + + +if __name__ == "__main__": + main() diff --git a/tools/db/validate_compacted_migration.py b/tools/db/validate_compacted_migration.py deleted file mode 100755 index bf15e8a5b..000000000 --- a/tools/db/validate_compacted_migration.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env python - -# vim: tabstop=4 shiftwidth=4 softtabstop=4 - -# Copyright 2012 OpenStack LLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -""" -Utility for ensuring DB schemas don't change when compacting migrations. - -Each release cycle the plan is to compact all of the migrations from that -release into a single file. This is a manual and, unfortunately, error-prone -process. To ensure that the schema doesn't change, this tool can be used to -diff the compacted DB schema to the original, uncompacted form. - -Notes: - -This utility assumes you start off in the branch containing the compacted -migration. - -Run like: - - ./tools/db/validate_compacted_migration.py mysql 82 master -""" -import os -import subprocess -import sys - -### Dump - - -def dump_db(db_driver, db_name, compacted_version, dump_filename, - latest=False): - db_driver.create(db_name) - try: - migrate(db_driver, db_name, compacted_version, latest=latest) - db_driver.dump(db_name, dump_filename) - finally: - db_driver.drop(db_name) - - -### Diff - - -def diff_files(filename1, filename2): - pipeline = ['diff -U 3 %(filename1)s %(filename2)s' % locals()] - - # Use colordiff if available - if subprocess.call(['which', 'colordiff']) == 0: - pipeline.append('colordiff') - - pipeline.append('less -R') - - cmd = ' | '.join(pipeline) - subprocess.check_call(cmd, shell=True) - - -### Database - - -class MySQL(object): - def create(self, name): - subprocess.check_call(['mysqladmin', '-u', 'root', 'create', name]) - - def drop(self, name): - subprocess.check_call(['mysqladmin', '-f', '-u', 'root', 'drop', name]) - - def dump(self, name, dump_filename): - subprocess.check_call( - 'mysqldump -u root %(name)s > %(dump_filename)s' % locals(), - shell=True) - - def url(self, name): - return 'mysql://root@localhost/%s' % name - - -class Postgres(object): - def create(self, name): - subprocess.check_call(['createdb', name]) - - def drop(self, name): - subprocess.check_call(['dropdb', name]) - - def dump(self, name, dump_filename): - subprocess.check_call( - 'pg_dump %(name)s > %(dump_filename)s' % locals(), - shell=True) - - def url(self, name): - return 'postgres://localhost/%s' % name - - -def _get_db_driver_class(db_type): - if db_type == "mysql": - return MySQL - elif db_type == "postgres": - return Postgres - else: - raise Exception("database %s not supported" % db_type) - - -### Migrate - - -def migrate(db_driver, db_name, compacted_version, latest=False): - # NOTE(sirp): sqlalchemy-migrate currently cannot handle the skipping of - # migration numbers - _migrate_cmd( - db_driver, db_name, 'version_control', str(compacted_version - 1)) - - upgrade_cmd = ['upgrade'] - if not latest: - upgrade_cmd.append(str(compacted_version)) - - _migrate_cmd(db_driver, db_name, *upgrade_cmd) - - -def _migrate_cmd(db_driver, db_name, *cmd): - MIGRATE_REPO = os.path.join(os.getcwd(), "nova/db/sqlalchemy/migrate_repo") - manage_py = os.path.join(MIGRATE_REPO, 'manage.py') - - args = ['python', manage_py] - args += cmd - args += ['--repository=%s' % MIGRATE_REPO, - '--url=%s' % db_driver.url(db_name)] - - subprocess.check_call(args) - - -### Git - - -def git_current_branch_name(): - ref_name = git_symbolic_ref('HEAD', quiet=True) - current_branch_name = ref_name.replace('refs/heads/', '') - return current_branch_name - - -def git_symbolic_ref(ref, quiet=False): - args = ['git', 'symbolic-ref', ref] - if quiet: - args.append('-q') - proc = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, stderr = proc.communicate() - return stdout.strip() - - -def git_checkout(branch_name): - subprocess.check_call(['git', 'checkout', branch_name]) - - -def git_has_uncommited_changes(): - return subprocess.call(['git', 'diff', '--quiet', '--exit-code']) == 1 - - -### Command - - -def die(msg): - print >> sys.stderr, "ERROR: %s" % msg - sys.exit(1) - - -def usage(msg=None): - if msg: - print >> sys.stderr, "ERROR: %s" % msg - - prog = "validate_compacted_migration.py" - args = ["", "", - ""] - - print >> sys.stderr, "usage: %s %s" % (prog, ' '.join(args)) - sys.exit(1) - - -def parse_options(): - try: - db_type = sys.argv[1] - except IndexError: - usage("must specify DB type") - - try: - compacted_version = int(sys.argv[2]) - except IndexError: - usage('must specify compacted migration version') - except ValueError: - usage('compacted version must be a number') - - try: - uncompacted_branch_name = sys.argv[3] - except IndexError: - usage('must specify uncompacted branch name') - - return db_type, compacted_version, uncompacted_branch_name - - -def main(): - COMPACTED_DB = 'compacted' - UNCOMPACTED_DB = 'uncompacted' - - COMPACTED_FILENAME = COMPACTED_DB + ".dump" - UNCOMPACTED_FILENAME = UNCOMPACTED_DB + ".dump" - - db_type, compacted_version, uncompacted_branch_name = parse_options() - - # Since we're going to be switching branches, ensure user doesn't have any - # uncommited changes - if git_has_uncommited_changes(): - die("You have uncommited changes. Please commit them before running " - "this command.") - - db_driver = _get_db_driver_class(db_type)() - - # Dump Compacted - dump_db(db_driver, COMPACTED_DB, compacted_version, COMPACTED_FILENAME) - - # Dump Uncompacted - original_branch_name = git_current_branch_name() - git_checkout(uncompacted_branch_name) - try: - dump_db(db_driver, UNCOMPACTED_DB, compacted_version, - UNCOMPACTED_FILENAME, latest=True) - finally: - git_checkout(original_branch_name) - - diff_files(UNCOMPACTED_FILENAME, COMPACTED_FILENAME) - - -if __name__ == "__main__": - main() -- cgit