diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2012-11-27 17:28:24 +0100 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2012-11-28 09:58:31 +0100 |
commit | 6f95223feffe43b14e1be4f70ef77bb5f0590f7e (patch) | |
tree | 56a7f591302a7f7797fdb7bfac101d4bdfaa4212 /kittystore | |
parent | 35204016d043c9d2623f163ba7b7f37d6da207ab (diff) | |
download | kittystore-6f95223feffe43b14e1be4f70ef77bb5f0590f7e.tar.gz kittystore-6f95223feffe43b14e1be4f70ef77bb5f0590f7e.tar.xz kittystore-6f95223feffe43b14e1be4f70ef77bb5f0590f7e.zip |
Package the get_mbox script as a proper generic script
Diffstat (limited to 'kittystore')
-rw-r--r-- | kittystore/scripts.py | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/kittystore/scripts.py b/kittystore/scripts.py index d882d9a..0a21792 100644 --- a/kittystore/scripts.py +++ b/kittystore/scripts.py @@ -29,6 +29,10 @@ from optparse import OptionParser from kittystore import get_store +# +# Manual database update +# + def updatedb(): parser = OptionParser(usage="%prog -s store_url") parser.add_option("-s", "--store", help="the URL to the store database") @@ -47,3 +51,71 @@ def updatedb(): "ORDER BY version DESC LIMIT 1" ))[0][0] print "Done, the current schema version is %d." % version + + +# +# Mailman 2 archives downloader +# + +import os +import urllib2 +import gzip +import itertools +from multiprocessing import Pool +from datetime import date + +MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July', + 'August', 'September', 'October', 'November', 'December'] + +def dl_archives(): + parser = OptionParser(usage="%prog -u URL -l LIST_NAME [-d destdir]") + parser.add_option("-u", "--url", help="URL to the mailman installation") + parser.add_option("-l", "--list-name", help="mailing-list name") + parser.add_option("-d", "--destination", default=os.getcwd(), + help="directory to download the archives to. Defaults " + "to the current directory (%default)") + parser.add_option("-s", "--start", default="2002", + help="first year to start looking for archives") + parser.add_option("-v", "--verbose", action="store_true", + help="show more information") + opts, args = parser.parse_args() + if not opts.url: + parser.error("an URL must be provided") + if not opts.list_name: + parser.error("a list name must be provided") + if "@" in opts.list_name: + opts.list_name = opts.list_name[:opts.list_name.index("@")] + years = range(int(opts.start), date.today().year + 1) + p = Pool(5) + p.map(_archive_downloader, itertools.product([opts], years, MONTHS)) + +def _archive_downloader(args): + opts, year, month = args + if not year or not month: + return + basename = "{0}-{1}.txt.gz".format(year, month) + filepath = os.path.join(opts.destination, basename) + if os.path.exists(filepath): + if opts.verbose: + print "{0} already downloaded, skipping".format(basename) + return + url = "{0}/pipermail/{1}/{2}".format( + opts.url, opts.list_name, basename) + if opts.verbose: + print "Downloading from {0}".format(url) + try: + request = urllib2.urlopen(url) + with open(filepath, "w") as f: + f.write(request.read()) + except urllib2.URLError, e: + if e.code == 404: + print ("This archive hasn't been created on the server yet: " + + basename) + else: + print e + return + pos = str(MONTHS.index(month) + 1).rjust(2, "0") + newname = '{0}-{1}-{2}-{3}.txt'.format(opts.list_name, year, pos, month) + with open(os.path.join(opts.destination, newname), "w") as f: + f.write(gzip.open(filepath).read()) + print "Downloaded archive for {0} {1} from {2}".format(month, year, url) |