summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-09-10 17:13:13 +0200
committerAurélien Bompard <aurelien@bompard.org>2012-09-10 17:18:21 +0200
commit24be4756ce076063a6a28fefe607ba72b10357c5 (patch)
treedcbdab22aa93f546d28d33d4a08204f1d23cad4f
parent7fbabe36ce3e80de885484e66adcc42827d8c5f2 (diff)
downloadkittystore-24be4756ce076063a6a28fefe607ba72b10357c5.tar.gz
kittystore-24be4756ce076063a6a28fefe607ba72b10357c5.tar.xz
kittystore-24be4756ce076063a6a28fefe607ba72b10357c5.zip
Improve date and header parsing
-rw-r--r--kittystore/storm/store.py3
-rw-r--r--kittystore/test/test_storm_store.py (renamed from kittystore/test/test_sa_store.py)30
-rw-r--r--kittystore/test/test_utils.py29
-rw-r--r--kittystore/test/testdata/wrong-in-reply-to-header.txt10
-rw-r--r--kittystore/utils.py18
5 files changed, 79 insertions, 11 deletions
diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py
index b35843c..0c5b91e 100644
--- a/kittystore/storm/store.py
+++ b/kittystore/storm/store.py
@@ -107,6 +107,9 @@ class StormStore(object):
payload = payload_to_unicode(message)
email.content = payload
email.date = parsedate(message.get("Date"))
+ if email.date is None:
+ # Absent or unparseable date
+ email.date = datetime.datetime.now()
email.full = message.as_string()
#category = 'Question' # TODO: enum + i18n ?
diff --git a/kittystore/test/test_sa_store.py b/kittystore/test/test_storm_store.py
index 69680ae..b917afc 100644
--- a/kittystore/test/test_sa_store.py
+++ b/kittystore/test/test_storm_store.py
@@ -3,19 +3,37 @@
import unittest
import email
import mailbox
+import datetime
-from kittystore.sa.store import KittySAStore, list_to_table_name
-from kittystore.sa.kittysamodel import get_class_object
-from sqlalchemy.exc import ProgrammingError
+from storm.exceptions import IntegrityError
+from kittystore.storm import get_storm_store
+from kittystore.storm.model import Email
from kittystore.test import get_test_file
class TestSAStore(unittest.TestCase):
def setUp(self):
- self.store = KittySAStore("sqlite:///:memory:")
+ self.store = get_storm_store("sqlite:")
- def tearDown(self):
- self.store.session.close()
+ #def tearDown(self):
+ # self.store.close()
+
+ def test_no_message_id(self):
+ msg = email.message.Message()
+ self.assertRaises(ValueError, self.store.add_to_list, "example-list", msg)
+
+ def test_no_date(self):
+ msg = email.message.Message()
+ msg["From"] = "dummy@example.com"
+ msg["Message-ID"] = "<dummy>"
+ msg.set_payload("Dummy message")
+ now = datetime.datetime.now()
+ try:
+ self.store.add_to_list("example-list", msg)
+ except IntegrityError, e:
+ self.fail(e)
+ stored_msg = self.store.db.find(Email).one()
+ self.assertTrue(stored_msg.date >= now)
#def test_non_ascii_payload(self):
# """add_to_list must handle non-ascii messages"""
diff --git a/kittystore/test/test_utils.py b/kittystore/test/test_utils.py
index 6b2eafa..46d7e72 100644
--- a/kittystore/test/test_utils.py
+++ b/kittystore/test/test_utils.py
@@ -2,6 +2,8 @@
import unittest
import email
+import datetime
+import dateutil
from mock import Mock
import kittystore.utils
@@ -34,7 +36,7 @@ class TestUtils(unittest.TestCase):
with open(get_test_file("payload-%s.txt" % enc)) as email_file:
msg = email.message_from_file(email_file)
payload = kittystore.utils.payload_to_unicode(msg)
- print enc, repr(payload)
+ #print enc, repr(payload)
self.assertTrue(isinstance(payload, unicode))
self.assertEqual(payload, u'This message contains non-ascii '
u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')
@@ -55,3 +57,28 @@ class TestUtils(unittest.TestCase):
h_out = kittystore.utils.header_to_unicode(h_in)
self.assertEqual(h_out, h_expected)
self.assertTrue(isinstance(h_out, unicode))
+
+ def test_wrong_datestring(self):
+ datestring = "Fri, 5 Dec 2003 11:41 +0000 (GMT Standard Time)"
+ parsed = kittystore.utils.parsedate(datestring)
+ self.assertEqual(parsed, None)
+
+ def test_very_large_timezone(self):
+ """
+ Timezone displacements must not be greater than 14 hours
+ Or PostgreSQL won't accept them.
+ """
+ datestrings = ["Wed, 1 Nov 2006 23:50:26 +1800",
+ "Wed, 1 Nov 2006 23:50:26 -1800"]
+ for datestring in datestrings:
+ parsed = kittystore.utils.parsedate(datestring)
+ self.assertEqual(parsed, dateutil.parser.parse(datestring))
+ self.assertTrue(parsed.utcoffset() <= datetime.timedelta(hours=13),
+ "UTC offset %s for datetime %s is too large"
+ % (parsed.utcoffset(), parsed))
+
+ def test_unknown_encoding(self):
+ """Unknown encodings should just replace unknown characters"""
+ header = "=?x-gbk?Q?Frank_B=A8=B9ttner?="
+ decoded = kittystore.utils.header_to_unicode(header)
+ self.assertEqual(decoded, u'Frank B\ufffd\ufffdttner')
diff --git a/kittystore/test/testdata/wrong-in-reply-to-header.txt b/kittystore/test/testdata/wrong-in-reply-to-header.txt
new file mode 100644
index 0000000..ce8a424
--- /dev/null
+++ b/kittystore/test/testdata/wrong-in-reply-to-header.txt
@@ -0,0 +1,10 @@
+From test at example.com Fri Apr 6 22:43:55 2007
+From: test at example.com (Dummy Person)
+Date: Fri, 6 Apr 2007 15:43:55 -0700 (PDT)
+Subject: Dummy subject
+In-Reply-To: <200704070053.46646.other.person@example
+Message-ID: <20070406224355.899B9180064@test.example.com>
+
+> Other person's message
+
+Dummy person's reply
diff --git a/kittystore/utils.py b/kittystore/utils.py
index 9ed0810..d5d84a7 100644
--- a/kittystore/utils.py
+++ b/kittystore/utils.py
@@ -18,11 +18,11 @@ import email.utils
import time
import re
from email.header import decode_header
-from datetime import datetime, tzinfo
+from datetime import datetime, tzinfo, timedelta
from base64 import b32encode
from hashlib import sha1
-import dateutil.parser
+import dateutil.parser, dateutil.tz
__all__ = ("get_message_id_hash", "parseaddr", "parsedate",
@@ -67,7 +67,11 @@ def header_to_unicode(header):
if h_decoded:
# not so sure why...
h_decoded.append(" ")
- h_decoded.append(decoded.decode(charset))
+ try:
+ h_decoded.append(decoded.decode(charset))
+ except LookupError:
+ # Unknown encoding
+ h_decoded.append(decoded.decode("ascii", "replace"))
return "".join(h_decoded)
def payload_to_unicode(message):
@@ -98,7 +102,13 @@ def payload_to_unicode(message):
def parsedate(datestring):
if datestring is None:
return None
- return dateutil.parser.parse(datestring)
+ try:
+ parsed = dateutil.parser.parse(datestring)
+ except ValueError:
+ return None
+ if abs(parsed.utcoffset()) > timedelta(hours=13):
+ parsed = parsed.astimezone(dateutil.tz.tzutc())
+ return parsed
#date_tuple = email.utils.parsedate_tz(datestring)
#timestamp = email.utils.mktime_tz(date_tuple)
#return datetime.fromtimestamp(timestamp)