summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-10-26 08:35:06 +0200
committerAurélien Bompard <aurelien@bompard.org>2012-10-26 08:35:06 +0200
commit3c98fc52c8238a5c2c6299b499ca5f413c533476 (patch)
treec76f4fa82589cdf7fa9ba15cae76f54ddc4dc3a9
parent60fb8886239f1076773147016472bcfb1633f8a2 (diff)
downloadkittystore-3c98fc52c8238a5c2c6299b499ca5f413c533476.tar.gz
kittystore-3c98fc52c8238a5c2c6299b499ca5f413c533476.tar.xz
kittystore-3c98fc52c8238a5c2c6299b499ca5f413c533476.zip
Import: make it possible to import duplicate emails
By changing their message-id header. This is useful to keep compatibility with former pipermail URLs, which use the message number as an identifier. If the pipermail archives contain duplicate emails, skipping them would mess up the numbering and thus the URLs.
-rw-r--r--kittystore/import.py29
1 files changed, 22 insertions, 7 deletions
diff --git a/kittystore/import.py b/kittystore/import.py
index 5355265..e63927a 100644
--- a/kittystore/import.py
+++ b/kittystore/import.py
@@ -36,13 +36,15 @@ from dateutil import tz
from kitchen.text.converters import to_bytes
from hashlib import sha1
from optparse import OptionParser
+from random import randint
+from email.utils import unquote
from kittystore import get_store
#KITTYSTORE_URL = 'postgres://mm3:mm3@localhost/mm3'
#KITTYSTORE_URL = 'postgres://kittystore:kittystore@localhost/kittystore'
-KITTYSTORE_URL = 'sqlite:///' + os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "kittystore.sqlite"))
+KITTYSTORE_URL = 'sqlite:///' + os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "kittystore.sqlite"))
PREFIX_RE = re.compile("^\[([\w\s_-]+)\] ")
@@ -100,10 +102,11 @@ class DbImporter(object):
Import email messages into the KittyStore database using its API.
"""
- def __init__(self, mlist, store):
+ def __init__(self, mlist, store, force_import=False):
self.mlist = mlist
self.store = store
self.total_imported = 0
+ self.force_import = force_import
def from_mbox(self, mbfile):
""" Upload all the emails in a mbox file into the database using
@@ -123,6 +126,14 @@ class DbImporter(object):
subject_prefix = PREFIX_RE.search(message["subject"])
if subject_prefix:
self.mlist.display_name = unicode(subject_prefix.group(1))
+ if self.force_import:
+ while self.store.is_message_in_list(
+ self.mlist.fqdn_listname, unquote(message["Message-Id"])):
+ print "Found duplicate, changing message id from", message["Message-Id"], "to",
+ message.replace_header("Message-Id",
+ "<%s-%s>" % (unquote(message["Message-Id"]),
+ str(randint(0, 100))))
+ print message["Message-Id"]
try:
msg_id_hash = self.store.add_to_list(self.mlist, message)
except ValueError, e:
@@ -183,9 +194,13 @@ def parse_args():
parser = OptionParser(usage=usage)
parser.add_option("-l", "--list-name", help="the fully-qualified list "
"name (including the '@' symbol and the domain name")
- parser.add_option("-v", "--verbose", help="show more output")
- parser.add_option("-D", "--duplicates", help="do not skip duplicate emails "
- "(same Message-ID header), import them with a different Message-ID")
+ parser.add_option("-v", "--verbose", action="store_true",
+ help="show more output")
+ parser.add_option("-d", "--debug", action="store_true",
+ help="show a whole lot more of output")
+ parser.add_option("-D", "--duplicates", action="store_true",
+ help="do not skip duplicate emails (same Message-ID header), "
+ "import them with a different Message-ID")
opts, args = parser.parse_args()
if opts.list_name is None:
parser.error("the list name must be given on the command-line.")
@@ -203,9 +218,9 @@ def parse_args():
def main():
opts, args = parse_args()
print 'Importing messages from %s to database...' % opts.list_name
- store = get_store(KITTYSTORE_URL, debug=False)
+ store = get_store(KITTYSTORE_URL, debug=opts.debug)
mlist = DummyMailingList(opts.list_name)
- importer = DbImporter(mlist, store)
+ importer = DbImporter(mlist, store, force_import=opts.duplicates)
for mbfile in args:
print "Importing from mbox file %s" % mbfile
importer.from_mbox(mbfile)