diff options
| author | Aurélien Bompard <aurelien@bompard.org> | 2012-10-26 08:35:06 +0200 |
|---|---|---|
| committer | Aurélien Bompard <aurelien@bompard.org> | 2012-10-26 08:35:06 +0200 |
| commit | 3c98fc52c8238a5c2c6299b499ca5f413c533476 (patch) | |
| tree | c76f4fa82589cdf7fa9ba15cae76f54ddc4dc3a9 | |
| parent | 60fb8886239f1076773147016472bcfb1633f8a2 (diff) | |
| download | kittystore-3c98fc52c8238a5c2c6299b499ca5f413c533476.tar.gz kittystore-3c98fc52c8238a5c2c6299b499ca5f413c533476.tar.xz kittystore-3c98fc52c8238a5c2c6299b499ca5f413c533476.zip | |
Import: make it possible to import duplicate emails
By changing their message-id header. This is useful to keep
compatibility with former pipermail URLs, which use the message number
as an identifier. If the pipermail archives contain duplicate emails,
skipping them would mess up the numbering and thus the URLs.
| -rw-r--r-- | kittystore/import.py | 29 |
1 files changed, 22 insertions, 7 deletions
diff --git a/kittystore/import.py b/kittystore/import.py index 5355265..e63927a 100644 --- a/kittystore/import.py +++ b/kittystore/import.py @@ -36,13 +36,15 @@ from dateutil import tz from kitchen.text.converters import to_bytes from hashlib import sha1 from optparse import OptionParser +from random import randint +from email.utils import unquote from kittystore import get_store #KITTYSTORE_URL = 'postgres://mm3:mm3@localhost/mm3' #KITTYSTORE_URL = 'postgres://kittystore:kittystore@localhost/kittystore' -KITTYSTORE_URL = 'sqlite:///' + os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "kittystore.sqlite")) +KITTYSTORE_URL = 'sqlite:///' + os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "kittystore.sqlite")) PREFIX_RE = re.compile("^\[([\w\s_-]+)\] ") @@ -100,10 +102,11 @@ class DbImporter(object): Import email messages into the KittyStore database using its API. """ - def __init__(self, mlist, store): + def __init__(self, mlist, store, force_import=False): self.mlist = mlist self.store = store self.total_imported = 0 + self.force_import = force_import def from_mbox(self, mbfile): """ Upload all the emails in a mbox file into the database using @@ -123,6 +126,14 @@ class DbImporter(object): subject_prefix = PREFIX_RE.search(message["subject"]) if subject_prefix: self.mlist.display_name = unicode(subject_prefix.group(1)) + if self.force_import: + while self.store.is_message_in_list( + self.mlist.fqdn_listname, unquote(message["Message-Id"])): + print "Found duplicate, changing message id from", message["Message-Id"], "to", + message.replace_header("Message-Id", + "<%s-%s>" % (unquote(message["Message-Id"]), + str(randint(0, 100)))) + print message["Message-Id"] try: msg_id_hash = self.store.add_to_list(self.mlist, message) except ValueError, e: @@ -183,9 +194,13 @@ def parse_args(): parser = OptionParser(usage=usage) parser.add_option("-l", "--list-name", help="the fully-qualified list " "name (including the '@' symbol and the domain name") - parser.add_option("-v", "--verbose", help="show more output") - parser.add_option("-D", "--duplicates", help="do not skip duplicate emails " - "(same Message-ID header), import them with a different Message-ID") + parser.add_option("-v", "--verbose", action="store_true", + help="show more output") + parser.add_option("-d", "--debug", action="store_true", + help="show a whole lot more of output") + parser.add_option("-D", "--duplicates", action="store_true", + help="do not skip duplicate emails (same Message-ID header), " + "import them with a different Message-ID") opts, args = parser.parse_args() if opts.list_name is None: parser.error("the list name must be given on the command-line.") @@ -203,9 +218,9 @@ def parse_args(): def main(): opts, args = parse_args() print 'Importing messages from %s to database...' % opts.list_name - store = get_store(KITTYSTORE_URL, debug=False) + store = get_store(KITTYSTORE_URL, debug=opts.debug) mlist = DummyMailingList(opts.list_name) - importer = DbImporter(mlist, store) + importer = DbImporter(mlist, store, force_import=opts.duplicates) for mbfile in args: print "Importing from mbox file %s" % mbfile importer.from_mbox(mbfile) |
