diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2012-09-10 17:13:13 +0200 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2012-09-10 17:18:21 +0200 |
commit | 24be4756ce076063a6a28fefe607ba72b10357c5 (patch) | |
tree | dcbdab22aa93f546d28d33d4a08204f1d23cad4f | |
parent | 7fbabe36ce3e80de885484e66adcc42827d8c5f2 (diff) | |
download | kittystore-24be4756ce076063a6a28fefe607ba72b10357c5.tar.gz kittystore-24be4756ce076063a6a28fefe607ba72b10357c5.tar.xz kittystore-24be4756ce076063a6a28fefe607ba72b10357c5.zip |
Improve date and header parsing
-rw-r--r-- | kittystore/storm/store.py | 3 | ||||
-rw-r--r-- | kittystore/test/test_storm_store.py (renamed from kittystore/test/test_sa_store.py) | 30 | ||||
-rw-r--r-- | kittystore/test/test_utils.py | 29 | ||||
-rw-r--r-- | kittystore/test/testdata/wrong-in-reply-to-header.txt | 10 | ||||
-rw-r--r-- | kittystore/utils.py | 18 |
5 files changed, 79 insertions, 11 deletions
diff --git a/kittystore/storm/store.py b/kittystore/storm/store.py index b35843c..0c5b91e 100644 --- a/kittystore/storm/store.py +++ b/kittystore/storm/store.py @@ -107,6 +107,9 @@ class StormStore(object): payload = payload_to_unicode(message) email.content = payload email.date = parsedate(message.get("Date")) + if email.date is None: + # Absent or unparseable date + email.date = datetime.datetime.now() email.full = message.as_string() #category = 'Question' # TODO: enum + i18n ? diff --git a/kittystore/test/test_sa_store.py b/kittystore/test/test_storm_store.py index 69680ae..b917afc 100644 --- a/kittystore/test/test_sa_store.py +++ b/kittystore/test/test_storm_store.py @@ -3,19 +3,37 @@ import unittest import email import mailbox +import datetime -from kittystore.sa.store import KittySAStore, list_to_table_name -from kittystore.sa.kittysamodel import get_class_object -from sqlalchemy.exc import ProgrammingError +from storm.exceptions import IntegrityError +from kittystore.storm import get_storm_store +from kittystore.storm.model import Email from kittystore.test import get_test_file class TestSAStore(unittest.TestCase): def setUp(self): - self.store = KittySAStore("sqlite:///:memory:") + self.store = get_storm_store("sqlite:") - def tearDown(self): - self.store.session.close() + #def tearDown(self): + # self.store.close() + + def test_no_message_id(self): + msg = email.message.Message() + self.assertRaises(ValueError, self.store.add_to_list, "example-list", msg) + + def test_no_date(self): + msg = email.message.Message() + msg["From"] = "dummy@example.com" + msg["Message-ID"] = "<dummy>" + msg.set_payload("Dummy message") + now = datetime.datetime.now() + try: + self.store.add_to_list("example-list", msg) + except IntegrityError, e: + self.fail(e) + stored_msg = self.store.db.find(Email).one() + self.assertTrue(stored_msg.date >= now) #def test_non_ascii_payload(self): # """add_to_list must handle non-ascii messages""" diff --git a/kittystore/test/test_utils.py b/kittystore/test/test_utils.py index 6b2eafa..46d7e72 100644 --- a/kittystore/test/test_utils.py +++ b/kittystore/test/test_utils.py @@ -2,6 +2,8 @@ import unittest import email +import datetime +import dateutil from mock import Mock import kittystore.utils @@ -34,7 +36,7 @@ class TestUtils(unittest.TestCase): with open(get_test_file("payload-%s.txt" % enc)) as email_file: msg = email.message_from_file(email_file) payload = kittystore.utils.payload_to_unicode(msg) - print enc, repr(payload) + #print enc, repr(payload) self.assertTrue(isinstance(payload, unicode)) self.assertEqual(payload, u'This message contains non-ascii ' u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n') @@ -55,3 +57,28 @@ class TestUtils(unittest.TestCase): h_out = kittystore.utils.header_to_unicode(h_in) self.assertEqual(h_out, h_expected) self.assertTrue(isinstance(h_out, unicode)) + + def test_wrong_datestring(self): + datestring = "Fri, 5 Dec 2003 11:41 +0000 (GMT Standard Time)" + parsed = kittystore.utils.parsedate(datestring) + self.assertEqual(parsed, None) + + def test_very_large_timezone(self): + """ + Timezone displacements must not be greater than 14 hours + Or PostgreSQL won't accept them. + """ + datestrings = ["Wed, 1 Nov 2006 23:50:26 +1800", + "Wed, 1 Nov 2006 23:50:26 -1800"] + for datestring in datestrings: + parsed = kittystore.utils.parsedate(datestring) + self.assertEqual(parsed, dateutil.parser.parse(datestring)) + self.assertTrue(parsed.utcoffset() <= datetime.timedelta(hours=13), + "UTC offset %s for datetime %s is too large" + % (parsed.utcoffset(), parsed)) + + def test_unknown_encoding(self): + """Unknown encodings should just replace unknown characters""" + header = "=?x-gbk?Q?Frank_B=A8=B9ttner?=" + decoded = kittystore.utils.header_to_unicode(header) + self.assertEqual(decoded, u'Frank B\ufffd\ufffdttner') diff --git a/kittystore/test/testdata/wrong-in-reply-to-header.txt b/kittystore/test/testdata/wrong-in-reply-to-header.txt new file mode 100644 index 0000000..ce8a424 --- /dev/null +++ b/kittystore/test/testdata/wrong-in-reply-to-header.txt @@ -0,0 +1,10 @@ +From test at example.com Fri Apr 6 22:43:55 2007 +From: test at example.com (Dummy Person) +Date: Fri, 6 Apr 2007 15:43:55 -0700 (PDT) +Subject: Dummy subject +In-Reply-To: <200704070053.46646.other.person@example +Message-ID: <20070406224355.899B9180064@test.example.com> + +> Other person's message + +Dummy person's reply diff --git a/kittystore/utils.py b/kittystore/utils.py index 9ed0810..d5d84a7 100644 --- a/kittystore/utils.py +++ b/kittystore/utils.py @@ -18,11 +18,11 @@ import email.utils import time import re from email.header import decode_header -from datetime import datetime, tzinfo +from datetime import datetime, tzinfo, timedelta from base64 import b32encode from hashlib import sha1 -import dateutil.parser +import dateutil.parser, dateutil.tz __all__ = ("get_message_id_hash", "parseaddr", "parsedate", @@ -67,7 +67,11 @@ def header_to_unicode(header): if h_decoded: # not so sure why... h_decoded.append(" ") - h_decoded.append(decoded.decode(charset)) + try: + h_decoded.append(decoded.decode(charset)) + except LookupError: + # Unknown encoding + h_decoded.append(decoded.decode("ascii", "replace")) return "".join(h_decoded) def payload_to_unicode(message): @@ -98,7 +102,13 @@ def payload_to_unicode(message): def parsedate(datestring): if datestring is None: return None - return dateutil.parser.parse(datestring) + try: + parsed = dateutil.parser.parse(datestring) + except ValueError: + return None + if abs(parsed.utcoffset()) > timedelta(hours=13): + parsed = parsed.astimezone(dateutil.tz.tzutc()) + return parsed #date_tuple = email.utils.parsedate_tz(datestring) #timestamp = email.utils.mktime_tz(date_tuple) #return datetime.fromtimestamp(timestamp) |