summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Laska <jlaska@redhat.com>2011-06-17 10:52:22 -0400
committerJames Laska <jlaska@redhat.com>2011-06-17 10:52:22 -0400
commitf9848188e3c93dd21f8e8ece1a96574798e0b7c6 (patch)
tree76272ec2caae433105b2ddc23eb6b1395c542795
parent64fa2f2d501940bee516e89c7dcc674b0e9e0506 (diff)
downloadscripts-f9848188e3c93dd21f8e8ece1a96574798e0b7c6.tar.gz
scripts-f9848188e3c93dd21f8e8ece1a96574798e0b7c6.tar.xz
scripts-f9848188e3c93dd21f8e8ece1a96574798e0b7c6.zip
Use existing opts.namespace instead of only_cat
-rwxr-xr-xget-mediawiki-data31
1 files changed, 16 insertions, 15 deletions
diff --git a/get-mediawiki-data b/get-mediawiki-data
index 7629be0..da80642 100755
--- a/get-mediawiki-data
+++ b/get-mediawiki-data
@@ -50,9 +50,6 @@ def parse_args():
optgrp.add_option('-c', '--category', dest="categories",
default=[], action="append",
help='Wiki category name to query (accepts multiple values)')
- optgrp.add_option('--only-cat', dest="only_cat",
- default=False, action='store_true',
- help='Only display sub-categories, not individual pages (default: %default)')
parser.add_option_group(optgrp)
# list_usercontribs
@@ -222,7 +219,7 @@ def getraw(wiki, titles):
return rev.get('*','')
return ''
-def list_categorymembers(wiki, cat_page, limit=5, only_cat=False):
+def list_categorymembers(wiki, cat_page, limit=5, namespaces=''):
'''Return a list of pages belonging to category page'''
# Add 'Category:' prefix if not given
if not cat_page.startswith("Category:"):
@@ -231,7 +228,11 @@ def list_categorymembers(wiki, cat_page, limit=5, only_cat=False):
# Build query arguments and call wiki
query = dict(action='query',
list='categorymembers',
+ cmlimit=50,
cmtitle=cat_page)
+ if namespaces != '':
+ query['cmnamespace'] = namespaces
+
if opts.debug: print query
response = wiki.call(query)
if opts.debug: print response
@@ -241,14 +242,14 @@ def list_categorymembers(wiki, cat_page, limit=5, only_cat=False):
# If necesary, repeatedly call the server to get more data
while response.has_key('query-continue'):
# get category member page names (limit to sub-categories if requested)
- members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title') and (not only_cat or entry.get('title','').startswith('Category:'))] )
+ members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title')] )
query['cmcontinue'] = response['query-continue']['categorymembers']['cmcontinue']
if opts.debug: print query
response = wiki.call(query)
if opts.debug: print response
# Extract any remaining data from the response
- members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title') and (not only_cat or entry.get('title','').startswith('Category:'))] )
+ members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title')] )
# Determine whether we need to recurse
idx = 0
@@ -257,10 +258,8 @@ def list_categorymembers(wiki, cat_page, limit=5, only_cat=False):
break
# Recurse?
if members[idx].startswith('Category:') and limit > 0:
- members.extend(list_categorymembers(wiki, members[idx], limit-1, only_cat))
- members.remove(members[idx]) # remove Category from list
- else:
- idx += 1
+ members.extend(list_categorymembers(wiki, members[idx], limit-1, namespaces))
+ idx += 1
return members
@@ -271,8 +270,13 @@ if __name__ == "__main__":
wiki = MediaWiki(opts.url)
if action == 'categorymembers':
+ ns_ids = ''
+ if len(opts.namespaces) > 0:
+ ns_ids = list()
+ namespaces = list_namespaces(wiki)
+ ns_ids = '|'.join([namespaces[ns] for ns in opts.namespaces if namespaces.has_key(ns)])
for cat_page in opts.categories:
- pages = list_categorymembers(wiki, cat_page, opts.limit, opts.only_cat)
+ pages = list_categorymembers(wiki, cat_page, opts.limit, ns_ids)
if pages:
print "\n".join(pages)
else:
@@ -284,10 +288,7 @@ if __name__ == "__main__":
if len(opts.namespaces) > 0:
ns_ids = list()
namespaces = list_namespaces(wiki)
- for ns in opts.namespaces:
- if namespaces.has_key(ns):
- ns_ids.append(namespaces[ns])
- ns_ids = "|".join(ns_ids)
+ ns_ids = '|'.join([namespaces[ns] for ns in opts.namespaces if namespaces.has_key(ns)])
# Gather data
user_edits = dict()