summaryrefslogtreecommitdiffstats
path: root/kittystore
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-11-22 17:07:48 +0100
committerAurélien Bompard <aurelien@bompard.org>2012-11-22 17:07:48 +0100
commit0f2952a26dffeb9ac8e53ee78981c9769d0cd8cd (patch)
tree0ebeabb1e3c680e109b777a0055c4c7830298815 /kittystore
parent5e0d7c3fc57e3917025fd44bb4f0cf9461a677d1 (diff)
downloadkittystore-0f2952a26dffeb9ac8e53ee78981c9769d0cd8cd.tar.gz
kittystore-0f2952a26dffeb9ac8e53ee78981c9769d0cd8cd.tar.xz
kittystore-0f2952a26dffeb9ac8e53ee78981c9769d0cd8cd.zip
Behave properly when an attachment can't be downloaded
Diffstat (limited to 'kittystore')
-rw-r--r--kittystore/import.py53
1 files changed, 33 insertions, 20 deletions
diff --git a/kittystore/import.py b/kittystore/import.py
index 29a0b7a..94015b6 100644
--- a/kittystore/import.py
+++ b/kittystore/import.py
@@ -90,6 +90,9 @@ class DummyMailingList(object):
self.display_name = None
+class DownloadError(Exception): pass
+
+
class DbImporter(object):
"""
Import email messages into the KittyStore database using its API.
@@ -135,6 +138,14 @@ class DbImporter(object):
str(randint(0, 100))))
print("Found duplicate, changing message id from %s to %s"
% (oldmsgid, message["Message-Id"]))
+ # Parse message to search for attachments
+ try:
+ attachments = self.extract_attachments(message)
+ except DownloadError, e:
+ print ("Could not download one of the attachments! "
+ "Skipping this message. Error: %s" % e.args[0])
+ continue
+ # Now insert the message
try:
self.store.add_to_list(self.mlist, message)
except ValueError, e:
@@ -143,8 +154,12 @@ class DbImporter(object):
print "%s from %s about %s" % (e.args[0],
e.args[1].get("From"), e.args[1].get("Subject"))
continue
- # Parse message to search for attachments
- self.extract_attachments(message)
+ # And insert the attachments
+ for att, counter in enumerate(attachments):
+ self.store.add_attachment(
+ self.mlist.fqdn_listname,
+ message["Message-Id"].strip(" <>"),
+ index, att[0], att[1], None, att[2])
self.store.flush()
cnt_imported += 1
@@ -155,44 +170,40 @@ class DbImporter(object):
def extract_attachments(self, message):
"""Parse message to search for attachments"""
+ all_attachments = []
message_text = message.as_string()
- counter = 0
#has_attach = False
#if "-------------- next part --------------" in message_text:
# has_attach = True
# Regular attachments
attachments = ATTACHMENT_RE.findall(message_text)
for att in attachments:
- counter += 1
- self.download_attachment(message["Message-Id"], counter,
- att[0], att[1], att[2])
+ all_attachments.append( (att[0], att[1],
+ self.download_attachment(att[2])) )
# Embedded messages
embedded = EMBEDDED_MSG_RE.findall(message_text)
for att in embedded:
- counter += 1
- self.download_attachment(message["Message-Id"], counter,
- att[0], 'message/rfc822', att[1])
+ all_attachments.append( (att[0], 'message/rfc822',
+ self.download_attachment(att[1])) )
# HTML attachments
html_attachments = HTML_ATTACH_RE.findall(message_text)
for att in html_attachments:
- counter += 1
url = att.strip("<>")
- self.download_attachment(message["Message-Id"], counter,
- os.path.basename(url), 'text/html', url)
+ all_attachments.append( (os.path.basename(url), 'text/html',
+ self.download_attachment(url)) )
# Text without charset
text_no_charset = TEXT_NO_CHARSET_RE.findall(message_text)
for att in text_no_charset:
- counter += 1
- self.download_attachment(message["Message-Id"], counter,
- att[0], 'text/plain', att[1])
+ all_attachments.append( (att[0], 'text/plain',
+ self.download_attachment(att[1])) )
## Other, probably inline text/plain
#if has_attach and not (attachments or embedded
# or html_attachments or text_no_charset):
# print message_text
+ return all_attachments
- def download_attachment(self, message_id, counter, name, ctype, url):
+ def download_attachment(self, url):
url = url.strip(" <>")
- message_id = message_id.strip(" <>")
if self.no_download:
if self.verbose:
print "NOT downloading attachment from %s" % url
@@ -200,9 +211,11 @@ class DbImporter(object):
else:
if self.verbose:
print "Downloading attachment from %s" % url
- content = urllib.urlopen(url).read()
- self.store.add_attachment(self.mlist.fqdn_listname, message_id,
- counter, name, ctype, None, content)
+ try:
+ content = urllib.urlopen(url).read()
+ except IOError, e:
+ raise DownloadError(e)
+ return content
def parse_args():