From f9848188e3c93dd21f8e8ece1a96574798e0b7c6 Mon Sep 17 00:00:00 2001 From: James Laska Date: Fri, 17 Jun 2011 10:52:22 -0400 Subject: Use existing opts.namespace instead of only_cat --- get-mediawiki-data | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/get-mediawiki-data b/get-mediawiki-data index 7629be0..da80642 100755 --- a/get-mediawiki-data +++ b/get-mediawiki-data @@ -50,9 +50,6 @@ def parse_args(): optgrp.add_option('-c', '--category', dest="categories", default=[], action="append", help='Wiki category name to query (accepts multiple values)') - optgrp.add_option('--only-cat', dest="only_cat", - default=False, action='store_true', - help='Only display sub-categories, not individual pages (default: %default)') parser.add_option_group(optgrp) # list_usercontribs @@ -222,7 +219,7 @@ def getraw(wiki, titles): return rev.get('*','') return '' -def list_categorymembers(wiki, cat_page, limit=5, only_cat=False): +def list_categorymembers(wiki, cat_page, limit=5, namespaces=''): '''Return a list of pages belonging to category page''' # Add 'Category:' prefix if not given if not cat_page.startswith("Category:"): @@ -231,7 +228,11 @@ def list_categorymembers(wiki, cat_page, limit=5, only_cat=False): # Build query arguments and call wiki query = dict(action='query', list='categorymembers', + cmlimit=50, cmtitle=cat_page) + if namespaces != '': + query['cmnamespace'] = namespaces + if opts.debug: print query response = wiki.call(query) if opts.debug: print response @@ -241,14 +242,14 @@ def list_categorymembers(wiki, cat_page, limit=5, only_cat=False): # If necesary, repeatedly call the server to get more data while response.has_key('query-continue'): # get category member page names (limit to sub-categories if requested) - members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title') and (not only_cat or entry.get('title','').startswith('Category:'))] ) + members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title')] ) query['cmcontinue'] = response['query-continue']['categorymembers']['cmcontinue'] if opts.debug: print query response = wiki.call(query) if opts.debug: print response # Extract any remaining data from the response - members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title') and (not only_cat or entry.get('title','').startswith('Category:'))] ) + members.extend( [entry.get('title') for entry in response.get('query',{}).get('categorymembers',{}) if entry.has_key('title')] ) # Determine whether we need to recurse idx = 0 @@ -257,10 +258,8 @@ def list_categorymembers(wiki, cat_page, limit=5, only_cat=False): break # Recurse? if members[idx].startswith('Category:') and limit > 0: - members.extend(list_categorymembers(wiki, members[idx], limit-1, only_cat)) - members.remove(members[idx]) # remove Category from list - else: - idx += 1 + members.extend(list_categorymembers(wiki, members[idx], limit-1, namespaces)) + idx += 1 return members @@ -271,8 +270,13 @@ if __name__ == "__main__": wiki = MediaWiki(opts.url) if action == 'categorymembers': + ns_ids = '' + if len(opts.namespaces) > 0: + ns_ids = list() + namespaces = list_namespaces(wiki) + ns_ids = '|'.join([namespaces[ns] for ns in opts.namespaces if namespaces.has_key(ns)]) for cat_page in opts.categories: - pages = list_categorymembers(wiki, cat_page, opts.limit, opts.only_cat) + pages = list_categorymembers(wiki, cat_page, opts.limit, ns_ids) if pages: print "\n".join(pages) else: @@ -284,10 +288,7 @@ if __name__ == "__main__": if len(opts.namespaces) > 0: ns_ids = list() namespaces = list_namespaces(wiki) - for ns in opts.namespaces: - if namespaces.has_key(ns): - ns_ids.append(namespaces[ns]) - ns_ids = "|".join(ns_ids) + ns_ids = '|'.join([namespaces[ns] for ns in opts.namespaces if namespaces.has_key(ns)]) # Gather data user_edits = dict() -- cgit