From 64fa2f2d501940bee516e89c7dcc674b0e9e0506 Mon Sep 17 00:00:00 2001 From: James Laska Date: Fri, 17 Jun 2011 10:32:15 -0400 Subject: Add --only-cat option to get-mediawik-data to only show sub-categories --- get-mediawiki-data | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/get-mediawiki-data b/get-mediawiki-data index cc3b4f1..7629be0 100755 --- a/get-mediawiki-data +++ b/get-mediawiki-data @@ -50,6 +50,9 @@ def parse_args(): optgrp.add_option('-c', '--category', dest="categories", default=[], action="append", help='Wiki category name to query (accepts multiple values)') + optgrp.add_option('--only-cat', dest="only_cat", + default=False, action='store_true', + help='Only display sub-categories, not individual pages (default: %default)') parser.add_option_group(optgrp) # list_usercontribs @@ -219,7 +222,7 @@ def getraw(wiki, titles): return rev.get('*','') return '' -def list_categorymembers(wiki, cat_page, limit=5): +def list_categorymembers(wiki, cat_page, limit=5, only_cat=False): '''Return a list of pages belonging to category page''' # Add 'Category:' prefix if not given if not cat_page.startswith("Category:"): @@ -237,14 +240,15 @@ def list_categorymembers(wiki, cat_page, limit=5): members = list() # If necesary, repeatedly call the server to get more data while response.has_key('query-continue'): - members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title')] ) + # get category member page names (limit to sub-categories if requested) + members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title') and (not only_cat or entry.get('title','').startswith('Category:'))] ) query['cmcontinue'] = response['query-continue']['categorymembers']['cmcontinue'] if opts.debug: print query response = wiki.call(query) if opts.debug: print response # Extract any remaining data from the response - members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title')] ) + members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title') and (not only_cat or entry.get('title','').startswith('Category:'))] ) # Determine whether we need to recurse idx = 0 @@ -253,7 +257,7 @@ def list_categorymembers(wiki, cat_page, limit=5): break # Recurse? if members[idx].startswith('Category:') and limit > 0: - members.extend(list_categorymembers(wiki, members[idx], limit-1)) + members.extend(list_categorymembers(wiki, members[idx], limit-1, only_cat)) members.remove(members[idx]) # remove Category from list else: idx += 1 @@ -268,7 +272,7 @@ if __name__ == "__main__": if action == 'categorymembers': for cat_page in opts.categories: - pages = list_categorymembers(wiki, cat_page, opts.limit) + pages = list_categorymembers(wiki, cat_page, opts.limit, opts.only_cat) if pages: print "\n".join(pages) else: -- cgit