diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2012-08-20 18:36:46 +0200 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2012-09-07 10:40:54 +0200 |
commit | b5c5a81d184bf47018b7360ad835217da5b8e0ee (patch) | |
tree | 1629218ae285780f2af3d30394837eff3aab84f2 | |
parent | b76a9457c8d3f2852a6b44b1d353c31cde74e1fd (diff) | |
download | kittystore-b5c5a81d184bf47018b7360ad835217da5b8e0ee.tar.gz kittystore-b5c5a81d184bf47018b7360ad835217da5b8e0ee.tar.xz kittystore-b5c5a81d184bf47018b7360ad835217da5b8e0ee.zip |
Handle non-ascii subject
-rw-r--r-- | kittystore/sa/store.py | 5 | ||||
-rw-r--r-- | kittystore/test/test_sa_store.py | 6 | ||||
-rw-r--r-- | kittystore/test/testdata/non-ascii-headers.txt | 6 | ||||
-rw-r--r-- | kittystore/utils.py | 19 | ||||
-rw-r--r-- | to_sqldb.py | 6 |
5 files changed, 29 insertions, 13 deletions
diff --git a/kittystore/sa/store.py b/kittystore/sa/store.py index 098d2e5..e4a417f 100644 --- a/kittystore/sa/store.py +++ b/kittystore/sa/store.py @@ -19,7 +19,7 @@ import datetime from kittystore import MessageNotFound from kittystore.utils import get_message_id_hash, parseaddr, parsedate -from kittystore.utils import get_ref_and_thread_id +from kittystore.utils import get_ref_and_thread_id, header_to_unicode from kittystore.sa.kittysamodel import get_class_object from zope.interface import implements @@ -122,6 +122,7 @@ class KittySAStore(object): thread_id = msg_id_hash from_name, from_email = parseaddr(message['From']) + from_name = header_to_unicode(from_name) # Turn non-ascii into Unicode, assuming UTF-8 for part in message.walk(): @@ -141,7 +142,7 @@ class KittySAStore(object): mail = email( sender=from_name, email=from_email, - subject=message.get('Subject'), + subject=header_to_unicode(message.get('Subject')), content=message.get_payload(), date=parsedate(message.get("Date")), message_id=msg_id, diff --git a/kittystore/test/test_sa_store.py b/kittystore/test/test_sa_store.py index 7e06361..289b658 100644 --- a/kittystore/test/test_sa_store.py +++ b/kittystore/test/test_sa_store.py @@ -37,6 +37,8 @@ class TestSAStore(unittest.TestCase): email = get_class_object(list_to_table_name("example-list"), 'email', self.store.metadata) for msg in self.store.session.query(email).all(): - print repr(msg.sender) + print repr(msg.sender), repr(msg.subject) self.failIf("=?" in msg.sender, - "header not decoded: %s" % msg.sender) + "From header not decoded: %s" % msg.sender) + self.failIf("=?" in msg.subject, + "Subject header not decoded: %s" % msg.sender) diff --git a/kittystore/test/testdata/non-ascii-headers.txt b/kittystore/test/testdata/non-ascii-headers.txt index 07f2941..dc6b655 100644 --- a/kittystore/test/testdata/non-ascii-headers.txt +++ b/kittystore/test/testdata/non-ascii-headers.txt @@ -69,7 +69,8 @@ Vit From bjorn at xn--rombobjrn-67a.se Thu Jul 12 23:49:23 2012 From: bjorn at xn--rombobjrn-67a.se (=?iso-8859-1?q?Bj=F6rn_Persson?=) Date: Fri, 13 Jul 2012 01:49:23 +0200 -Subject: [Fedora-packaging] Script-Tools, which location? +Subject: [Fedora-fr-list] =?iso-8859-1?q?Compte-rendu_de_la_r=E9union_du_?= + =?iso-8859-1?q?1_novembre_2009?= In-Reply-To: <CACyNwR1VLMm019JTnju9gbXVq4=p2y+=NP8NuOh1XShtRVG+Nw@mail.gmail.com> References: <CACyNwR1VLMm019JTnju9gbXVq4=p2y+=NP8NuOh1XShtRVG+Nw@mail.gmail.com> Message-ID: <201207130149.23771.bjorn@xn--rombobjrn-67a.se> @@ -122,7 +123,8 @@ URL: <http://lists.fedoraproject.org/pipermail/packaging/attachments/20120713/23 From mmaslano at redhat.com Wed Jul 18 07:41:37 2012 From: mmaslano at redhat.com (=?UTF-8?B?TWFyY2VsYSBNYcWhbMOhxYhvdsOh?=) Date: Wed, 18 Jul 2012 09:41:37 +0200 -Subject: [Fedora-packaging] RPM macros +Subject: =?UTF-8?Q?Re=3A_=5BFedora=2Dfr=2Dlist=5D_Compte=2Drendu_de_la_r=C3=A9union_du_?= + =?UTF-8?Q?1_novembre_2009?= In-Reply-To: <4FFC1228.3060409@redhat.com> References: <4FFC1228.3060409@redhat.com> Message-ID: <500668B1.8090904@redhat.com> diff --git a/kittystore/utils.py b/kittystore/utils.py index 73d0efe..4703c40 100644 --- a/kittystore/utils.py +++ b/kittystore/utils.py @@ -26,7 +26,7 @@ import dateutil.parser __all__ = ("get_message_id_hash", "parseaddr", "parsedate", - "get_ref_and_thread_id", + "header_to_unicode", "get_ref_and_thread_id", ) @@ -55,14 +55,19 @@ def parseaddr(address): """ address = address.replace(" at ", "@") from_name, from_email = email.utils.parseaddr(address) - from_decoded = [] - for decoded, charset in decode_header(from_name): + return from_name, from_email + +def header_to_unicode(header): + h_decoded = [] + for decoded, charset in decode_header(header): if charset is None: - from_decoded.append(unicode(decoded)) + h_decoded.append(unicode(decoded)) else: - from_decoded.append(decoded.decode(charset)) - from_name = "".join(from_decoded) - return from_name, from_email + if h_decoded: + # not so sure why... + h_decoded.append(" ") + h_decoded.append(decoded.decode(charset)) + return "".join(h_decoded) def parsedate(datestring): if datestring is None: diff --git a/to_sqldb.py b/to_sqldb.py index 4afa8af..2f709bb 100644 --- a/to_sqldb.py +++ b/to_sqldb.py @@ -13,6 +13,7 @@ from dateutil.parser import parse from dateutil import tz from kitchen.text.converters import to_bytes from hashlib import sha1 +from sqlalchemy.exc import OperationalError from kittystore import get_store @@ -50,6 +51,11 @@ def to_db(mbfile, list_name, store): print "%s from %s about %s" % (e.args[0], e.args[1].get("From"), e.args[1].get("Subject")) continue + except OperationalError, e: + print message["From"], message["Subject"], e + # Database is locked + time.sleep(1) + msg_id_hash = store.add_to_list(list_name, message) store.session.flush() cnt = cnt + 1 store.session.commit() |