summaryrefslogtreecommitdiffstats
path: root/kittystore
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-11-27 17:28:24 +0100
committerAurélien Bompard <aurelien@bompard.org>2012-11-28 09:58:31 +0100
commit6f95223feffe43b14e1be4f70ef77bb5f0590f7e (patch)
tree56a7f591302a7f7797fdb7bfac101d4bdfaa4212 /kittystore
parent35204016d043c9d2623f163ba7b7f37d6da207ab (diff)
downloadkittystore-6f95223feffe43b14e1be4f70ef77bb5f0590f7e.tar.gz
kittystore-6f95223feffe43b14e1be4f70ef77bb5f0590f7e.tar.xz
kittystore-6f95223feffe43b14e1be4f70ef77bb5f0590f7e.zip
Package the get_mbox script as a proper generic script
Diffstat (limited to 'kittystore')
-rw-r--r--kittystore/scripts.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/kittystore/scripts.py b/kittystore/scripts.py
index d882d9a..0a21792 100644
--- a/kittystore/scripts.py
+++ b/kittystore/scripts.py
@@ -29,6 +29,10 @@ from optparse import OptionParser
from kittystore import get_store
+#
+# Manual database update
+#
+
def updatedb():
parser = OptionParser(usage="%prog -s store_url")
parser.add_option("-s", "--store", help="the URL to the store database")
@@ -47,3 +51,71 @@ def updatedb():
"ORDER BY version DESC LIMIT 1"
))[0][0]
print "Done, the current schema version is %d." % version
+
+
+#
+# Mailman 2 archives downloader
+#
+
+import os
+import urllib2
+import gzip
+import itertools
+from multiprocessing import Pool
+from datetime import date
+
+MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July',
+ 'August', 'September', 'October', 'November', 'December']
+
+def dl_archives():
+ parser = OptionParser(usage="%prog -u URL -l LIST_NAME [-d destdir]")
+ parser.add_option("-u", "--url", help="URL to the mailman installation")
+ parser.add_option("-l", "--list-name", help="mailing-list name")
+ parser.add_option("-d", "--destination", default=os.getcwd(),
+ help="directory to download the archives to. Defaults "
+ "to the current directory (%default)")
+ parser.add_option("-s", "--start", default="2002",
+ help="first year to start looking for archives")
+ parser.add_option("-v", "--verbose", action="store_true",
+ help="show more information")
+ opts, args = parser.parse_args()
+ if not opts.url:
+ parser.error("an URL must be provided")
+ if not opts.list_name:
+ parser.error("a list name must be provided")
+ if "@" in opts.list_name:
+ opts.list_name = opts.list_name[:opts.list_name.index("@")]
+ years = range(int(opts.start), date.today().year + 1)
+ p = Pool(5)
+ p.map(_archive_downloader, itertools.product([opts], years, MONTHS))
+
+def _archive_downloader(args):
+ opts, year, month = args
+ if not year or not month:
+ return
+ basename = "{0}-{1}.txt.gz".format(year, month)
+ filepath = os.path.join(opts.destination, basename)
+ if os.path.exists(filepath):
+ if opts.verbose:
+ print "{0} already downloaded, skipping".format(basename)
+ return
+ url = "{0}/pipermail/{1}/{2}".format(
+ opts.url, opts.list_name, basename)
+ if opts.verbose:
+ print "Downloading from {0}".format(url)
+ try:
+ request = urllib2.urlopen(url)
+ with open(filepath, "w") as f:
+ f.write(request.read())
+ except urllib2.URLError, e:
+ if e.code == 404:
+ print ("This archive hasn't been created on the server yet: "
+ + basename)
+ else:
+ print e
+ return
+ pos = str(MONTHS.index(month) + 1).rjust(2, "0")
+ newname = '{0}-{1}-{2}-{3}.txt'.format(opts.list_name, year, pos, month)
+ with open(os.path.join(opts.destination, newname), "w") as f:
+ f.write(gzip.open(filepath).read())
+ print "Downloaded archive for {0} {1} from {2}".format(month, year, url)