summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPierre-Yves Chibon <pingou@pingoured.fr>2014-12-19 11:56:50 +0100
committerPierre-Yves Chibon <pingou@pingoured.fr>2014-12-19 11:56:59 +0100
commite6e33204af55ffa1f93b2c36662b99d73bbe75c4 (patch)
tree69ece3e6c48b04a95e865f10316dbf751dff6e43
parentf6972b74762ae45edf14cca11c035c253dbdbd5c (diff)
downloadansible-e6e33204af55ffa1f93b2c36662b99d73bbe75c4.tar.gz
ansible-e6e33204af55ffa1f93b2c36662b99d73bbe75c4.tar.xz
ansible-e6e33204af55ffa1f93b2c36662b99d73bbe75c4.zip
Add the repo2json role
-rw-r--r--roles/repo2json/files/repo2json.cron1
-rw-r--r--roles/repo2json/files/rhel_to_json.py239
-rw-r--r--roles/repo2json/tasks/main.yml32
3 files changed, 272 insertions, 0 deletions
diff --git a/roles/repo2json/files/repo2json.cron b/roles/repo2json/files/repo2json.cron
new file mode 100644
index 000000000..c18ba9a0d
--- /dev/null
+++ b/roles/repo2json/files/repo2json.cron
@@ -0,0 +1 @@
+45 * * * * root cd /srv/web/repojson && /usr/local/bin/repo2json
diff --git a/roles/repo2json/files/rhel_to_json.py b/roles/repo2json/files/rhel_to_json.py
new file mode 100644
index 000000000..340a643a7
--- /dev/null
+++ b/roles/repo2json/files/rhel_to_json.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python2
+
+"""
+This script extracts the content of the primary.sqlite databases used by
+RHEL and generates a big JSON out of it so that we can easily check which
+packages already are in RHEL and on which arch.
+
+requires:
+ sqlalchemy
+ lzma (only if there are .xz compressed primary.sqlite db)
+
+"""
+
+# These two lines are needed to run on EL6
+__requires__ = ['SQLAlchemy >= 0.7']
+import pkg_resources
+
+
+import contextlib
+import json
+import os
+import shutil
+import sys
+import tempfile
+
+
+# Database related part
+
+from sqlalchemy import Column, ForeignKey, Integer, Text, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+
+
+BASE = declarative_base()
+
+
+class Package(BASE):
+ ''' Maps the packages table in the primary.sqlite database from
+ repodata to a python object.
+ '''
+ __tablename__ = 'packages'
+ pkgKey = Column(Integer, primary_key=True)
+ name = Column(Text)
+ rpm_sourcerpm = Column(Text)
+ version = Column(Text)
+ epoch = Column(Text)
+ release = Column(Text)
+ arch = Column(Text)
+
+ @property
+ def basename(self):
+ ''' Return the base package name using the rpm_sourcerpms info. '''
+ return self.rpm_sourcerpm.rsplit('-', 2)[0]
+
+
+# Here below we tell the script where to look for the repodata, we could
+# point it to the top level, but then we would miss the differences between
+# el5, 6 and 7.
+# I tried to create some rhel5 and rhel6 folders in which I sym-linked the
+# respective el5/6 folder from the level above. The problem was that
+# os.path.walk() doesn't follow links, so it would not find any repodata.
+
+PATHS = {
+ 'el7': [
+ '/mnt/fedora/app/fi-repo/rhel/rhel7/',
+ ],
+ 'el6': [
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-ha-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-ha-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-lb-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-lb-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-optional-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-optional-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-rs-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-rs-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-ha-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-lb-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-optional-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-optional-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-ost-preview',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-rhevh',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-rhevm-3',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-rhs-rhsc-2.0',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-ha-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-ha-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-lb-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-optional-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-optional-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-lb-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-rhsclient-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-rs-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-rs-fastrack-6',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-sfs-6',
+ ],
+ 'el5': [
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-grid-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-grid-execute-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-management-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-messaging-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-messaging-base-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-realtime-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-cluster-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-cluster-storage-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-fastrack-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-productivity-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-vt-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5-mrg-messaging-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5-mrg-messaging-base-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-cluster-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-cluster-storage-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-fastrack-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-productivity-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-vt-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-grid-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-grid-execute-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-management-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-messaging-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-messaging-base-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-realtime-1/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-fastrack-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-productivity-5/',
+ '/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-vt-5/',
+ ],
+}
+
+
+def find_primary_sqlite(paths):
+ ''' Find all the primary.sqlite files located at or under the given
+ path.
+ '''
+ if not isinstance(paths, list):
+ paths = [paths]
+ files = []
+ for path in paths:
+ if not os.path.isdir(path):
+ continue
+ for (dirpath, dirnames, filenames) in os.walk(path):
+ for filename in filenames:
+ if 'primary.sqlite' in filename:
+ files.append(os.path.join(dirpath, filename))
+ return files
+
+
+def decompress_primary_db(archive, location):
+ ''' Decompress the given XZ archive at the specified location. '''
+ if archive.endswith('.xz'):
+ import lzma
+ with contextlib.closing(lzma.LZMAFile(archive)) as stream_xz:
+ data = stream_xz.read()
+ with open(location, 'wb') as stream:
+ stream.write(data)
+ elif archive.endswith('.gz'):
+ import tarfile
+ with tarfile.open(archive) as tar:
+ tar.extractall(path=location)
+ elif archive.endswith('.bz2'):
+ import bz2
+ with open(location, 'w') as out:
+ bzar = bz2.BZ2File(archive)
+ out.write(bzar.read())
+ bzar.close()
+ elif archive.endswith('.sqlite'):
+ with open(location, 'w') as out:
+ with open(archive) as inp:
+ out.write(inp.read())
+
+
+def get_pkg_info(session, pkg_name):
+ ''' Query the sqlite database for the package specified. '''
+ pkg = session.query(Package).filter(Package.name == pkg_name).one()
+ return pkg
+
+
+def main():
+ ''' Main function, does the job :) '''
+ working_dir = tempfile.mkdtemp(prefix='rhel2json-')
+ print 'working dir:', working_dir
+
+ for el in PATHS:
+
+ output = {}
+
+ dbfiles = find_primary_sqlite(PATHS[el])
+
+ for dbfile_xz in dbfiles:
+ cur_fold = os.path.join(*dbfile_xz.rsplit(os.sep, 2)[:-2])
+ print '-', cur_fold
+ dbfile = os.path.join(working_dir, 'primary_db_%s.sqlite' % el)
+ decompress_primary_db(dbfile_xz, dbfile)
+
+ if not os.path.isfile(dbfile):
+ print '%s was incorrectly decompressed -- ignoring' % dbfile
+ continue
+
+ db_url = 'sqlite:///%s' % dbfile
+ db_session = sessionmaker(bind=create_engine(db_url))
+ session = db_session()
+
+ cnt = 0
+ new = 0
+ for pkg in session.query(Package).all():
+ if pkg.basename in output:
+ if pkg.arch not in output[pkg.basename]['arch']:
+ output[pkg.basename]['arch'].append(pkg.arch)
+ # TODO: checks if the evr is more recent or not
+ # (and update if it is)
+ else:
+ new += 1
+ output[pkg.basename] = {
+ 'arch': [pkg.arch],
+ 'epoch': pkg.epoch,
+ 'version': pkg.version,
+ 'release': pkg.release,
+ }
+ cnt += 1
+ print '%s packages in %s' % (cnt, cur_fold)
+ print '%s packages were new packages' % (new)
+
+ print '\n%s packages retrieved in %s' % (len(output), el)
+ outputfile = 'pkg_%s.json' % el
+ with open(outputfile, 'w') as stream:
+ stream.write(json.dumps(output))
+ print 'Output File: %s\n' % outputfile
+
+ # Drop the temp directory
+ shutil.rmtree(working_dir)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/roles/repo2json/tasks/main.yml b/roles/repo2json/tasks/main.yml
new file mode 100644
index 000000000..0e2ddcd53
--- /dev/null
+++ b/roles/repo2json/tasks/main.yml
@@ -0,0 +1,32 @@
+---
+# Configuration for the fedocal webapp
+
+- name: clean yum metadata
+ command: yum clean all
+ tags:
+ - packages
+ - repo2json
+
+- name: Install necessary packages
+ yum: pkg={{ item }} state=present
+ with_items:
+ - python-sqlalchemy0.7
+ - pyliblzma
+ tags:
+ - packages
+ - repo2json
+
+- name: Ensure that the output dir exists
+ file: dest=/srv/web/repojson owner=root group=root mode=0755 state=directory
+ tags:
+ - repo2json
+
+- name: Install the rhel_to_json script and cron
+ copy: src={{ item.file }} dest={{ item.dest }}
+ owner=root group=root mode={{ item.mode }}
+ with_items:
+ - { file: rhel_to_json.py, dest: /usr/bin/local/rhel_to_json.py, mode: 755 }
+ - { file: repo2json.cron, dest: /etc/cron.d/repo2json.cron, mode: 644 }
+ tags:
+ - cron
+ - repo2json