summaryrefslogtreecommitdiffstats
path: root/roles/distgit
diff options
context:
space:
mode:
authorMatt Prahl <mprahl@redhat.com>2017-08-18 18:40:25 +0000
committerRalph Bean <rbean@redhat.com>2017-08-18 18:42:42 +0000
commit27a90039feae777e705c25c3dc980503d06aa84a (patch)
treebc4550cc4f2210e777e7e20f2a080429b977755e /roles/distgit
parent11ace40beb77efa884ad799804ceda6adf28c053 (diff)
downloadansible-27a90039feae777e705c25c3dc980503d06aa84a.tar.gz
ansible-27a90039feae777e705c25c3dc980503d06aa84a.tar.xz
ansible-27a90039feae777e705c25c3dc980503d06aa84a.zip
Use multithread on Pagure and PDC queries
Diffstat (limited to 'roles/distgit')
-rw-r--r--roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2278
1 files changed, 200 insertions, 78 deletions
diff --git a/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2 b/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2
index db9f5bc04..6d83cf583 100644
--- a/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2
+++ b/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2
@@ -27,7 +27,7 @@ sync information from the Pagure into bugzilla
This short script takes information about package onwership and imports it
into bugzilla.
'''
-
+from __future__ import print_function
import re
import argparse
import datetime
@@ -39,6 +39,9 @@ import json
import xmlrpclib
import codecs
import smtplib
+import multiprocessing.pool
+from math import ceil
+from functools import partial
try:
from email.Message import Message
except ImportError:
@@ -483,8 +486,10 @@ def _get_watchers_rv_json(pagure_project):
print('Querying {0}'.format(watchers_api_url))
watchers_rv = session.get(watchers_api_url, timeout=60)
if not watchers_rv.ok:
- error_msg = base_error_msg.format(
- watchers_api_url, watchers_rv.status_code, watchers_rv.text)
+ error_msg = ('The connection to "{0}" failed with the status code {1} '
+ 'and output "{2}"'.format(
+ watchers_api_url, watchers_rv.status_code,
+ watchers_rv.text))
raise RuntimeError(error_msg)
return watchers_rv.json()
@@ -507,8 +512,10 @@ def _is_retired_in_pdc(product, project):
raise RuntimeError("Could not find %r in PDC." % project)
branches = pdc_rv.json()['results']
if not branches:
- raise RuntimeError("No results for %r in PDC." % project)
- return branches[0]['active']
+ print("No results for %r in PDC." % project, file=sys.stderr)
+ # Assume it's not retired if we can't find out for sure
+ return False
+ return not branches[0]['active']
@cache.cache_on_arguments()
@@ -526,18 +533,100 @@ def _get_override_yaml(project):
return {}
-def pagure_project_to_acl_schema(pagure_project, product):
+@cache.cache_on_arguments()
+def _get_package_summary_from_mdapi(namespace, repo, session=None):
+ summary = None
+ if namespace != 'rpms':
+ return summary
+
+ if session is None:
+ session = retry_session()
+
+ url = '{0}/rawhide/srcpkg/{1}'.format(MDAPIURL.rstrip('/'), repo)
+ if DRY_RUN:
+ print('Querying {0}'.format(url))
+
+ rv = session.get(url, timeout=60)
+ if rv.ok:
+ rv_json = rv.json()
+ summary = rv_json['summary']
+ elif not rv.ok and rv.status_code != 404:
+ error_msg = ('The connection to "{0}" failed with the status code {1} '
+ 'and output "{2}"').format(url, rv.status_code, rv.text)
+ raise RuntimeError(error_msg)
+
+ return summary
+
+
+def _get_pdc_project_name_and_branches(session, namespace, repo):
+ """
+ Gets the branches on a project. This function is used for mapping.
+ :param namespace: string of the namespace the project is in
+ :param repo: string of the project
+ :return: a tuple with the repo name and a list of the repo's branches
+ """
+ branches_url = '{0}component-branches/'.format(PDCURL)
+ params = dict(
+ global_component=repo,
+ type=PDC_TYPES[namespace]
+ )
+ if DRY_RUN:
+ print('Querying {0} {1}'.format(branches_url, params))
+ rv = session.get(branches_url, timeout=60)
+
+ # If the project's branches can't be reported, just return no branches and
+ # it will be skipped later on
+ if not rv.ok:
+ print(('The connection to "{0}" failed with the status code {1} and '
+ 'output "{2}"'.format(branches_url, rv.status_code, rv.text)),
+ file = sys.stderr)
+ return repo, []
+
+ data = rv.json()
+ return repo, [branch['name'] for branch in data['results']]
+
+
+def _get_pagure_projects_from_page(session, namespace, page):
+ """
+ Gets the names of all the Pagure projects on a page. This function is to be
+ used for mapping.
+ :param namespace: string of the namespace to query for projects
+ :param page: int of the page to query at
+ :return: list of projects on the page
+ """
+ url = ('{0}/api/0/projects?namespace={1}&page={2}&per_page=100&'
+ 'fork=false'.format(
+ PAGURE_DIST_GIT_URL.rstrip('/'), namespace, page))
+
+ if DRY_RUN:
+ print('- Querying {0}'.format(url))
+
+ response = session.get(url, timeout=120)
+ if not bool(response):
+ print("Failed to talk to %r %r." % (
+ response.request.url, response), file=sys.stderr)
+ raise RuntimeError('Failed to talk to {0} {1}.'.format(
+ response.request.url, response))
+
+ return response.json()['projects']
+
+
+def _pagure_project_to_acl_schema(project_and_product, session=None):
"""
This function translates the JSON of a Pagure project to what PkgDB used to
- output in the Bugzilla API.
- :param pagure_project: a dictionary of the JSON of a Pagure project
- :return: a dictionary of the content that the Bugzilla API would output
+ output in the Bugzilla API. This function is used for mapping.
+ :param project_and_product: a tuple containing the dictionary of the JSON
+ of a Pagure project and a string of the product (e.g. "Fedora",
+ "Fedora EPEL")
+ :param session: a requests session object or None
+ :return: a dictionary of the content that the PkgDB Bugzilla API would
+ return
"""
- session = retry_session()
- base_error_msg = ('The connection to "{0}" failed with the status code '
- '{1} and output "{2}"')
+ project, product = project_and_product
+ if session is None:
+ session = retry_session()
- watchers_rv_json = _get_watchers_rv_json(pagure_project)
+ watchers_rv_json = _get_watchers_rv_json(project)
user_cc_list = []
for user, watch_levels in watchers_rv_json['watchers'].items():
@@ -545,28 +634,15 @@ def pagure_project_to_acl_schema(pagure_project, product):
if 'issues' in watch_levels:
user_cc_list.append(user)
- summary = None
- if pagure_project['namespace'] == 'rpms':
- mdapi_url = '{0}/rawhide/srcpkg/{1}'.format(
- MDAPIURL.rstrip('/'), pagure_project['name'])
- if DRY_RUN:
- print('Querying {0}'.format(mdapi_url))
- mdapi_rv = session.get(mdapi_url, timeout=60)
- if mdapi_rv.ok:
- mdapi_rv_json = mdapi_rv.json()
- summary = mdapi_rv_json['summary']
- elif not mdapi_rv.ok and mdapi_rv.status_code != 404:
- error_msg = base_error_msg.format(
- mdapi_url, mdapi_rv.status_code, mdapi_rv.text)
- raise RuntimeError(error_msg)
-
- # Check if the branch is retired in PDC, and if so set assignee to orphan.
- owner = pagure_project['access_users']['owner'][0]
+ summary = _get_package_summary_from_mdapi(
+ project['namespace'], project['name'], session)
+
+ # Check if the project is retired in PDC, and if so set assignee to orphan.
+ owner = project['access_users']['owner'][0]
if _is_retired_in_pdc(product, project):
owner = 'extras-orphan@fedoraproject.org'
# Check if the Bugzilla ticket assignee has been manually overridden
- owner = pagure_project['access_users']['owner'][0]
override_yaml = _get_override_yaml(project)
if override_yaml.get(product) \
and isinstance(override_yaml[product], string_types):
@@ -583,7 +659,11 @@ def pagure_project_to_acl_schema(pagure_project, product):
# No package has this set in PkgDB's API, so it can be safely turned
# off and set to the defaults later on in the code
'qacontact': None,
- 'summary': summary
+ 'summary': summary,
+ # These two values are not part of original PkgDB RV, but they are
+ # useful
+ 'product': product,
+ 'project': project['name']
}
@@ -610,57 +690,99 @@ if __name__ == '__main__':
'Fedora Container': {},
'Fedora EPEL': {},
}
- pagure_rpms_api_url = ('{0}/api/0/projects?fork=false&namespace=rpms&page=1&'
- 'per_page=100'.format(
- PAGURE_DIST_GIT_URL.rstrip('/')))
- session = retry_session()
- while True:
+ session = retry_session()
+ pagure_namespace_to_project_lists = {}
+ pool = multiprocessing.pool.ThreadPool(8)
+
+ # Query for all the rpm and container projects and store them in
+ # pagure_namespace_to_projects
+ for namespace in ['rpms', 'container']:
+ first_page_url = ('{0}/api/0/projects?namespace={1}&fork=false&page=1'
+ '&per_page=1'.format(PAGURE_DIST_GIT_URL, namespace))
if DRY_RUN:
- print('Querying {0}'.format(pagure_rpms_api_url))
- rv_json = session.get(pagure_rpms_api_url, timeout=120).json()
- for project in rv_json['projects']:
- pagure_project_branches_api_url = (
- '{0}/api/0/rpms/{1}/git/branches'
- .format(PAGURE_DIST_GIT_URL.rstrip('/'), project['name']))
- branch_rv_json = session.get(
- pagure_project_branches_api_url, timeout=60).json()
- epel = False
- fedora = False
- for branch in branch_rv_json['branches']:
- if re.match(r'epel\d+', branch):
- epel = True
- projects_dict['Fedora EPEL'][project['name']] = \
- pagure_project_to_acl_schema(project, 'Fedora EPEL')
- else:
- fedora = True
- projects_dict['Fedora'][project['name']] = \
- pagure_project_to_acl_schema(project, 'Fedora')
+ print('- Querying {0}'.format(first_page_url))
+ first_page_rv = session.get(first_page_url, timeout=120)
+
+ if not bool(first_page_rv):
+ raise RuntimeError('Failed to talk to {0} {1}.'.format(
+ first_page_rv.request.url, first_page_rv))
+
+ total_projects = first_page_rv.json()['total_projects']
+ num_pages = int(ceil(total_projects / 100.0))
+
+ # Since we are going to multi-thread, we need to make a partial
+ # function call so that all the function needs is an iterable to run
+ p_get_pagure_projects_from_page = partial(
+ _get_pagure_projects_from_page, session, namespace)
+ pagure_namespace_to_project_lists[namespace] = pool.map(
+ p_get_pagure_projects_from_page, range(1, num_pages + 1))
+
+ # Flatten the list of lists (each page is a list of a projects)
+ pagure_namespace_to_projects = {}
+ for namespace in ['rpms', 'container']:
+ pagure_namespace_to_projects[namespace] = []
+ for project_list in pagure_namespace_to_project_lists[namespace]:
+ pagure_namespace_to_projects[namespace] += project_list
+ # This is no longer needed, so we can save some RAM
+ del pagure_namespace_to_project_lists
+
+ # Now, we must get all the branches for the RPM projects we just queried.
+ # This will be stored in pagure_rpm_project_branches as a dictionary of
+ # {'python-requests': 'master', 'f27', 'f26'}
+ pagure_rpm_project_names = [project['name'] for project in
+ pagure_namespace_to_projects['rpms']]
+ p_get_pdc_project_name_and_branches = partial(
+ _get_pdc_project_name_and_branches, session, 'rpms')
+ pagure_rpm_project_branches = dict(pool.map(
+ p_get_pdc_project_name_and_branches, pagure_rpm_project_names))
+ # This is no longer needed, so we can save some RAM
+ del pagure_rpm_project_names
+
+ # Determine what products each project maps to based on its branches.
+ # pagure_rpms_project_products will be in the format of
+ # [('python-requests': 'Fedora')...] which will be used my a mapping
+ # function below
+ pagure_rpms_project_products = []
+ for project in pagure_namespace_to_projects['rpms']:
+ name = project['name']
+ products = []
+ branches = pagure_rpm_project_branches[name]
+ for branch in branches:
+ if re.match(r'^epel\d+$', branch):
+ epel = True
+ products.append('Fedora EPEL')
+ else:
+ fedora = True
+ products.append('Fedora')
- if fedora and epel:
- break
+ if 'Fedora' in products and 'Fedora EPEL' in products:
+ break
- if rv_json['pagination']['next']:
- pagure_rpms_api_url = rv_json['pagination']['next']
- else:
- break
+ for product in products:
+ pagure_rpms_project_products.append((project, product))
- pagure_container_api_url = (
- '{0}/api/0/projects?fork=false&namespace=container&page=1&per_page=100'
- .format(PAGURE_DIST_GIT_URL))
- while True:
- if DRY_RUN:
- print('Querying {0}'.format(pagure_container_api_url))
- rv_json = session.get(pagure_container_api_url, timeout=120).json()
- for project in rv_json['projects']:
- project_pkgdb_schema = pagure_project_to_acl_schema(project)
- projects_dict['Fedora Container'][project['name']] = \
- project_pkgdb_schema
-
- if rv_json['pagination']['next']:
- pagure_container_api_url = rv_json['pagination']['next']
- else:
- break
+ for project in pagure_namespace_to_projects['container']:
+ pagure_rpms_project_products.append((project, 'Fedora Container'))
+
+ # Save some RAM since this large dict is no longer needed
+ del pagure_namespace_to_projects
+
+ # Now, we must transform the data we collected into something that PkgDB
+ # would have returned
+ p_pagure_project_to_acl_schema = partial(
+ _pagure_project_to_acl_schema, session=session)
+ project_to_acl_schemas = pool.map(
+ p_pagure_project_to_acl_schema, pagure_rpms_project_products)
+ pool.close()
+
+ # Transform the data returned in project_to_acl_schemas to be an orderly
+ # dictionary for ease of use later on.
+ for rv in project_to_acl_schemas:
+ projects_dict[rv['product']][rv['project']] = rv
+
+ # This is no longer needed, so we can save some RAM
+ del project_to_acl_schemas
# Initialize the connection to bugzilla
bugzilla = Bugzilla(BZSERVER, BZUSER, BZPASS, projects_dict)