diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2012-09-03 16:19:26 +0200 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2012-09-07 10:41:51 +0200 |
commit | d7b70ee351cb7a26b7c7ee9400d8ef3491166373 (patch) | |
tree | be72470c2677eedae02ec30dedf7129c0e26988d | |
parent | 4deab0e4779217dd0f82ba9beaea18b40ed31933 (diff) | |
download | kittystore-d7b70ee351cb7a26b7c7ee9400d8ef3491166373.tar.gz kittystore-d7b70ee351cb7a26b7c7ee9400d8ef3491166373.tar.xz kittystore-d7b70ee351cb7a26b7c7ee9400d8ef3491166373.zip |
Improve message loading
-rw-r--r-- | get_mbox.py | 12 | ||||
-rw-r--r-- | to_sqldb.py | 3 |
2 files changed, 11 insertions, 4 deletions
diff --git a/get_mbox.py b/get_mbox.py index 0576e31..4c611d8 100644 --- a/get_mbox.py +++ b/get_mbox.py @@ -4,9 +4,10 @@ import itertools import urlgrabber import gzip import sys +import os from multiprocessing import Pool -years = [2010, 2011, 2012, 2009, 2008, 2007, 2006, 2005, 2004, 2003, 2002] +years = range(2002, 2013) months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] @@ -19,6 +20,9 @@ def archive_downloader(i): if not list_name or not year or not month: return basename = "{0}-{1}.txt.gz".format(year, month) + if os.path.exists(basename): + print "{0} already downloaded, skipping".format(basename) + return filename = "http://lists.fedoraproject.org/pipermail/{0}/{1}".format( list_name, basename) try: @@ -30,8 +34,10 @@ def archive_downloader(i): with open(newname, "w") as f: f.write(gzip.open(basename).read()) print "== {0} downloaded ==".format(filename) - except urlgrabber.grabber.URLGrabError: - pass + except urlgrabber.grabber.URLGrabError, e: + print e + if e.errno == 14: # 404 + os.remove(basename) if __name__ == "__main__": diff --git a/to_sqldb.py b/to_sqldb.py index 73ce615..add297e 100644 --- a/to_sqldb.py +++ b/to_sqldb.py @@ -19,6 +19,7 @@ from kittystore import get_store TOTALCNT = 0 #DB_URL = 'postgres://mm3:mm3@localhost/mm3' +#DB_URL = 'postgres://kittystore:kittystore@localhost/kittystore' DB_URL = 'sqlite:///' + os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "kittystore.sqlite")) @@ -71,7 +72,7 @@ python to_sqldb.py list_name mbox_file [mbox_file]''' else: print 'Adding to database list: %s' % sys.argv[1] - store = get_store(DB_URL) + store = get_store(DB_URL, debug=False) for mbfile in sys.argv[2:]: print mbfile if os.path.exists(mbfile): |