summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-09-26 13:58:22 +0200
committerAurélien Bompard <aurelien@bompard.org>2012-09-26 16:20:25 +0200
commitc6ab287d319d3e9fb3d6f009ffc0c8d1017c9721 (patch)
tree9cab09c6bdc347382cd2a0757be6642d06b96c47
parentca1967c915458c7e6b54a43767a8b50dea277fb9 (diff)
downloadkittystore-c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721.tar.gz
kittystore-c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721.tar.xz
kittystore-c6ab287d319d3e9fb3d6f009ffc0c8d1017c9721.zip
Integrate the scrubber into the store
-rw-r--r--kittystore/scrub.py11
-rw-r--r--kittystore/storm/model.py16
-rw-r--r--kittystore/storm/store.py49
-rw-r--r--kittystore/utils.py28
4 files changed, 64 insertions, 40 deletions
diff --git a/kittystore/scrub.py b/kittystore/scrub.py
index e7f4dde..a463ac3 100644
--- a/kittystore/scrub.py
+++ b/kittystore/scrub.py
@@ -301,12 +301,15 @@ class Scrubber(object):
except (UnicodeError, LookupError, ValueError,
AssertionError):
pass
- replace_payload_by_text(self.msg, sep.join(text), charset)
+ text = sep.join(text)
+ del self.msg['content-type']
+ del self.msg['content-transfer-encoding']
+ self.msg.set_payload(text, charset)
if format:
self.msg.set_param('Format', format)
if delsp:
self.msg.set_param('DelSp', delsp)
- return self.msg
+ return text.decode(charset)
def save_attachment(self, part, counter, filter_html=True):
@@ -366,4 +369,6 @@ class Scrubber(object):
# BAW: I'm sure we can eventually do better than this. :(
decodedpayload = websafe(str(submsg))
msg_id = self.msg['Message-Id'].strip("<>")
- self.store.add_attachment(self.mlist, msg_id, counter, decodedpayload)
+ self.store.add_attachment(
+ self.mlist, msg_id, counter, filebase+ext,
+ ctype, decodedpayload)
diff --git a/kittystore/storm/model.py b/kittystore/storm/model.py
index 0ef7212..4177fc7 100644
--- a/kittystore/storm/model.py
+++ b/kittystore/storm/model.py
@@ -22,7 +22,7 @@ from kittystore.utils import get_message_id_hash
from .hack_datetime import DateTime
-__all__ = ("List", "Email",)
+__all__ = ("List", "Email", "Attachment")
class List(object):
@@ -71,3 +71,17 @@ class Email(object):
self.list_name = unicode(list_name)
self.message_id = unicode(message_id)
self.message_id_hash = unicode(get_message_id_hash(self.message_id))
+
+
+class Attachment(object):
+
+ __storm_table__ = "attachment"
+ __storm_primary__ = "list_name", "message_id", "counter"
+
+ list_name = Unicode()
+ message_id = Unicode()
+ counter = Int()
+ name = Unicode()
+ content_type = Unicode()
+ size = Int()
+ content = RawStr()
diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py
index ebec34f..a78a23c 100644
--- a/kittystore/storm/store.py
+++ b/kittystore/storm/store.py
@@ -18,15 +18,16 @@ import datetime
from kittystore import MessageNotFound
from kittystore.utils import parseaddr, parsedate
-from kittystore.utils import header_to_unicode, payload_to_unicode
-from kittystore.scrub import scrub_message
+from kittystore.utils import header_to_unicode
+from kittystore.scrub import Scrubber
from kittystore.utils import get_ref_and_thread_id
from zope.interface import implements
from mailman.interfaces.messages import IMessageStore
from storm.locals import *
+from storm.expr import And, Or
-from .model import List, Email
+from .model import List, Email, Attachment
class StormStore(object):
@@ -78,14 +79,15 @@ class StormStore(object):
The storage service is also allowed to raise this exception
if it find, but disallows collisions.
"""
+ list_name = unicode(list_name)
# Create the list if it does not exist
list_is_in_db = self.db.find(List,
- List.name == unicode(list_name)).count()
+ List.name == list_name).count()
if not list_is_in_db:
self.db.add(List(list_name))
if not message.has_key("Message-Id"):
raise ValueError("No 'Message-Id' header in email", message)
- msg_id = message['Message-Id'].strip("<>")
+ msg_id = unicode(message['Message-Id'].strip("<>"))
email = Email(list_name, msg_id)
if self.is_message_in_list(list_name, email.message_id):
print ("Duplicate email from %s: %s" %
@@ -105,13 +107,13 @@ class StormStore(object):
email.sender_name = from_name
email.sender_email = unicode(from_email)
email.subject = header_to_unicode(message.get('Subject'))
- payload = payload_to_unicode(scrub_message(list_name, message))
- email.content = payload
+ email.full = message.as_string() # Before scrubbing
+ scrubber = Scrubber(list_name, message, self)
+ email.content = scrubber.scrub()
email.date = parsedate(message.get("Date"))
if email.date is None:
# Absent or unparseable date
email.date = datetime.datetime.now()
- email.full = message.as_string()
#category = 'Question' # TODO: enum + i18n ?
#if ('agenda' in message.get('Subject', '').lower() or
@@ -360,6 +362,37 @@ class StormStore(object):
)).config(distinct=True)
return list(participants)
+ # Attachments
+
+ def add_attachment(self, mlist, msg_id, counter, name, content_type,
+ content):
+ attachment = Attachment()
+ attachment.list_name = unicode(mlist)
+ attachment.message_id = unicode(msg_id)
+ attachment.counter = counter
+ attachment.name = unicode(name)
+ attachment.content_type = unicode(content_type)
+ attachment.content = content
+ attachment.size = len(content)
+ self.db.add(attachment)
+ self.flush()
+
+ def get_attachments(self, list_name, message_id):
+ """Return the message's attachments
+
+ :param list_name: The fully qualified list name to which the
+ message should be added.
+ :param message_id: The Message-ID header contents to search for.
+ :returns: A list of attachments
+ """
+ att = self.db.find(Attachment, And(
+ Attachment.list_name == unicode(list_name),
+ Attachment.message_id == unicode(message_id)
+ )).order_by(Attachment.counter)
+ return list(att)
+
+ # Generic database operations
+
def flush(self):
"""Flush pending database operations."""
self.db.flush()
diff --git a/kittystore/utils.py b/kittystore/utils.py
index af3a150..49c4b29 100644
--- a/kittystore/utils.py
+++ b/kittystore/utils.py
@@ -74,34 +74,6 @@ def header_to_unicode(header):
h_decoded.append(decoded.decode("ascii", "replace"))
return "".join(h_decoded)
-def payload_to_unicode(message):
- # Turn non-ascii into Unicode, assuming UTF-8
- payload = []
- for part in message.walk():
- if part.get_content_type() != "text/plain":
- continue # TODO: handle HTML messages and attachments
- part_payload = part.get_payload()
- if part.get_content_charset() is None:
- for encoding in ["ascii", "utf-8", "iso-8859-15"]:
- try:
- part_payload = part_payload.decode(encoding)
- except UnicodeDecodeError:
- continue
- else:
- #print encoding, payload
- break
- # Try UTF-8
- #part.set_charset("utf-8")
- #try:
- # payload.append(part.get_payload().decode("utf-8"))
- #except UnicodeDecodeError, e:
- # print e
- # print message.items()
- # print part.get_payload()
- # raise
- payload.append(unicode(part_payload))
- return unicode("".join(payload))
-
def parsedate(datestring):
if datestring is None:
return None