diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2012-09-26 13:58:22 +0200 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2012-09-26 16:20:25 +0200 |
commit | c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721 (patch) | |
tree | 9cab09c6bdc347382cd2a0757be6642d06b96c47 | |
parent | ca1967c915458c7e6b54a43767a8b50dea277fb9 (diff) | |
download | kittystore-c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721.tar.gz kittystore-c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721.tar.xz kittystore-c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721.zip |
Integrate the scrubber into the store
-rw-r--r-- | kittystore/scrub.py | 11 | ||||
-rw-r--r-- | kittystore/storm/model.py | 16 | ||||
-rw-r--r-- | kittystore/storm/store.py | 49 | ||||
-rw-r--r-- | kittystore/utils.py | 28 |
4 files changed, 64 insertions, 40 deletions
diff --git a/kittystore/scrub.py b/kittystore/scrub.py index e7f4dde..a463ac3 100644 --- a/kittystore/scrub.py +++ b/kittystore/scrub.py @@ -301,12 +301,15 @@ class Scrubber(object): except (UnicodeError, LookupError, ValueError, AssertionError): pass - replace_payload_by_text(self.msg, sep.join(text), charset) + text = sep.join(text) + del self.msg['content-type'] + del self.msg['content-transfer-encoding'] + self.msg.set_payload(text, charset) if format: self.msg.set_param('Format', format) if delsp: self.msg.set_param('DelSp', delsp) - return self.msg + return text.decode(charset) def save_attachment(self, part, counter, filter_html=True): @@ -366,4 +369,6 @@ class Scrubber(object): # BAW: I'm sure we can eventually do better than this. :( decodedpayload = websafe(str(submsg)) msg_id = self.msg['Message-Id'].strip("<>") - self.store.add_attachment(self.mlist, msg_id, counter, decodedpayload) + self.store.add_attachment( + self.mlist, msg_id, counter, filebase+ext, + ctype, decodedpayload) diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py index 0ef7212..4177fc7 100644 --- a/kittystore/storm/model.py +++ b/kittystore/storm/model.py @@ -22,7 +22,7 @@ from kittystore.utils import get_message_id_hash from .hack_datetime import DateTime -__all__ = ("List", "Email",) +__all__ = ("List", "Email", "Attachment") class List(object): @@ -71,3 +71,17 @@ class Email(object): self.list_name = unicode(list_name) self.message_id = unicode(message_id) self.message_id_hash = unicode(get_message_id_hash(self.message_id)) + + +class Attachment(object): + + __storm_table__ = "attachment" + __storm_primary__ = "list_name", "message_id", "counter" + + list_name = Unicode() + message_id = Unicode() + counter = Int() + name = Unicode() + content_type = Unicode() + size = Int() + content = RawStr() diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py index ebec34f..a78a23c 100644 --- a/kittystore/storm/store.py +++ b/kittystore/storm/store.py @@ -18,15 +18,16 @@ import datetime from kittystore import MessageNotFound from kittystore.utils import parseaddr, parsedate -from kittystore.utils import header_to_unicode, payload_to_unicode -from kittystore.scrub import scrub_message +from kittystore.utils import header_to_unicode +from kittystore.scrub import Scrubber from kittystore.utils import get_ref_and_thread_id from zope.interface import implements from mailman.interfaces.messages import IMessageStore from storm.locals import * +from storm.expr import And, Or -from .model import List, Email +from .model import List, Email, Attachment class StormStore(object): @@ -78,14 +79,15 @@ class StormStore(object): The storage service is also allowed to raise this exception if it find, but disallows collisions. """ + list_name = unicode(list_name) # Create the list if it does not exist list_is_in_db = self.db.find(List, - List.name == unicode(list_name)).count() + List.name == list_name).count() if not list_is_in_db: self.db.add(List(list_name)) if not message.has_key("Message-Id"): raise ValueError("No 'Message-Id' header in email", message) - msg_id = message['Message-Id'].strip("<>") + msg_id = unicode(message['Message-Id'].strip("<>")) email = Email(list_name, msg_id) if self.is_message_in_list(list_name, email.message_id): print ("Duplicate email from %s: %s" % @@ -105,13 +107,13 @@ class StormStore(object): email.sender_name = from_name email.sender_email = unicode(from_email) email.subject = header_to_unicode(message.get('Subject')) - payload = payload_to_unicode(scrub_message(list_name, message)) - email.content = payload + email.full = message.as_string() # Before scrubbing + scrubber = Scrubber(list_name, message, self) + email.content = scrubber.scrub() email.date = parsedate(message.get("Date")) if email.date is None: # Absent or unparseable date email.date = datetime.datetime.now() - email.full = message.as_string() #category = 'Question' # TODO: enum + i18n ? #if ('agenda' in message.get('Subject', '').lower() or @@ -360,6 +362,37 @@ class StormStore(object): )).config(distinct=True) return list(participants) + # Attachments + + def add_attachment(self, mlist, msg_id, counter, name, content_type, + content): + attachment = Attachment() + attachment.list_name = unicode(mlist) + attachment.message_id = unicode(msg_id) + attachment.counter = counter + attachment.name = unicode(name) + attachment.content_type = unicode(content_type) + attachment.content = content + attachment.size = len(content) + self.db.add(attachment) + self.flush() + + def get_attachments(self, list_name, message_id): + """Return the message's attachments + + :param list_name: The fully qualified list name to which the + message should be added. + :param message_id: The Message-ID header contents to search for. + :returns: A list of attachments + """ + att = self.db.find(Attachment, And( + Attachment.list_name == unicode(list_name), + Attachment.message_id == unicode(message_id) + )).order_by(Attachment.counter) + return list(att) + + # Generic database operations + def flush(self): """Flush pending database operations.""" self.db.flush() diff --git a/kittystore/utils.py b/kittystore/utils.py index af3a150..49c4b29 100644 --- a/kittystore/utils.py +++ b/kittystore/utils.py @@ -74,34 +74,6 @@ def header_to_unicode(header): h_decoded.append(decoded.decode("ascii", "replace")) return "".join(h_decoded) -def payload_to_unicode(message): - # Turn non-ascii into Unicode, assuming UTF-8 - payload = [] - for part in message.walk(): - if part.get_content_type() != "text/plain": - continue # TODO: handle HTML messages and attachments - part_payload = part.get_payload() - if part.get_content_charset() is None: - for encoding in ["ascii", "utf-8", "iso-8859-15"]: - try: - part_payload = part_payload.decode(encoding) - except UnicodeDecodeError: - continue - else: - #print encoding, payload - break - # Try UTF-8 - #part.set_charset("utf-8") - #try: - # payload.append(part.get_payload().decode("utf-8")) - #except UnicodeDecodeError, e: - # print e - # print message.items() - # print part.get_payload() - # raise - payload.append(unicode(part_payload)) - return unicode("".join(payload)) - def parsedate(datestring): if datestring is None: return None |