summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-08-20 18:36:46 +0200
committerAurélien Bompard <aurelien@bompard.org>2012-09-07 10:40:54 +0200
commitb5c5a81d184bf47018b7360ad835217da5b8e0ee (patch)
tree1629218ae285780f2af3d30394837eff3aab84f2
parentb76a9457c8d3f2852a6b44b1d353c31cde74e1fd (diff)
downloadkittystore-b5c5a81d184bf47018b7360ad835217da5b8e0ee.tar.gz
kittystore-b5c5a81d184bf47018b7360ad835217da5b8e0ee.tar.xz
kittystore-b5c5a81d184bf47018b7360ad835217da5b8e0ee.zip
Handle non-ascii subject
-rw-r--r--kittystore/sa/store.py5
-rw-r--r--kittystore/test/test_sa_store.py6
-rw-r--r--kittystore/test/testdata/non-ascii-headers.txt6
-rw-r--r--kittystore/utils.py19
-rw-r--r--to_sqldb.py6
5 files changed, 29 insertions, 13 deletions
diff --git a/kittystore/sa/store.py b/kittystore/sa/store.py
index 098d2e5..e4a417f 100644
--- a/kittystore/sa/store.py
+++ b/kittystore/sa/store.py
@@ -19,7 +19,7 @@ import datetime
from kittystore import MessageNotFound
from kittystore.utils import get_message_id_hash, parseaddr, parsedate
-from kittystore.utils import get_ref_and_thread_id
+from kittystore.utils import get_ref_and_thread_id, header_to_unicode
from kittystore.sa.kittysamodel import get_class_object
from zope.interface import implements
@@ -122,6 +122,7 @@ class KittySAStore(object):
thread_id = msg_id_hash
from_name, from_email = parseaddr(message['From'])
+ from_name = header_to_unicode(from_name)
# Turn non-ascii into Unicode, assuming UTF-8
for part in message.walk():
@@ -141,7 +142,7 @@ class KittySAStore(object):
mail = email(
sender=from_name,
email=from_email,
- subject=message.get('Subject'),
+ subject=header_to_unicode(message.get('Subject')),
content=message.get_payload(),
date=parsedate(message.get("Date")),
message_id=msg_id,
diff --git a/kittystore/test/test_sa_store.py b/kittystore/test/test_sa_store.py
index 7e06361..289b658 100644
--- a/kittystore/test/test_sa_store.py
+++ b/kittystore/test/test_sa_store.py
@@ -37,6 +37,8 @@ class TestSAStore(unittest.TestCase):
email = get_class_object(list_to_table_name("example-list"), 'email',
self.store.metadata)
for msg in self.store.session.query(email).all():
- print repr(msg.sender)
+ print repr(msg.sender), repr(msg.subject)
self.failIf("=?" in msg.sender,
- "header not decoded: %s" % msg.sender)
+ "From header not decoded: %s" % msg.sender)
+ self.failIf("=?" in msg.subject,
+ "Subject header not decoded: %s" % msg.sender)
diff --git a/kittystore/test/testdata/non-ascii-headers.txt b/kittystore/test/testdata/non-ascii-headers.txt
index 07f2941..dc6b655 100644
--- a/kittystore/test/testdata/non-ascii-headers.txt
+++ b/kittystore/test/testdata/non-ascii-headers.txt
@@ -69,7 +69,8 @@ Vit
From bjorn at xn--rombobjrn-67a.se Thu Jul 12 23:49:23 2012
From: bjorn at xn--rombobjrn-67a.se (=?iso-8859-1?q?Bj=F6rn_Persson?=)
Date: Fri, 13 Jul 2012 01:49:23 +0200
-Subject: [Fedora-packaging] Script-Tools, which location?
+Subject: [Fedora-fr-list] =?iso-8859-1?q?Compte-rendu_de_la_r=E9union_du_?=
+ =?iso-8859-1?q?1_novembre_2009?=
In-Reply-To: <CACyNwR1VLMm019JTnju9gbXVq4=p2y+=NP8NuOh1XShtRVG+Nw@mail.gmail.com>
References: <CACyNwR1VLMm019JTnju9gbXVq4=p2y+=NP8NuOh1XShtRVG+Nw@mail.gmail.com>
Message-ID: <201207130149.23771.bjorn@xn--rombobjrn-67a.se>
@@ -122,7 +123,8 @@ URL: <http://lists.fedoraproject.org/pipermail/packaging/attachments/20120713/23
From mmaslano at redhat.com Wed Jul 18 07:41:37 2012
From: mmaslano at redhat.com (=?UTF-8?B?TWFyY2VsYSBNYcWhbMOhxYhvdsOh?=)
Date: Wed, 18 Jul 2012 09:41:37 +0200
-Subject: [Fedora-packaging] RPM macros
+Subject: =?UTF-8?Q?Re=3A_=5BFedora=2Dfr=2Dlist=5D_Compte=2Drendu_de_la_r=C3=A9union_du_?=
+ =?UTF-8?Q?1_novembre_2009?=
In-Reply-To: <4FFC1228.3060409@redhat.com>
References: <4FFC1228.3060409@redhat.com>
Message-ID: <500668B1.8090904@redhat.com>
diff --git a/kittystore/utils.py b/kittystore/utils.py
index 73d0efe..4703c40 100644
--- a/kittystore/utils.py
+++ b/kittystore/utils.py
@@ -26,7 +26,7 @@ import dateutil.parser
__all__ = ("get_message_id_hash", "parseaddr", "parsedate",
- "get_ref_and_thread_id",
+ "header_to_unicode", "get_ref_and_thread_id",
)
@@ -55,14 +55,19 @@ def parseaddr(address):
"""
address = address.replace(" at ", "@")
from_name, from_email = email.utils.parseaddr(address)
- from_decoded = []
- for decoded, charset in decode_header(from_name):
+ return from_name, from_email
+
+def header_to_unicode(header):
+ h_decoded = []
+ for decoded, charset in decode_header(header):
if charset is None:
- from_decoded.append(unicode(decoded))
+ h_decoded.append(unicode(decoded))
else:
- from_decoded.append(decoded.decode(charset))
- from_name = "".join(from_decoded)
- return from_name, from_email
+ if h_decoded:
+ # not so sure why...
+ h_decoded.append(" ")
+ h_decoded.append(decoded.decode(charset))
+ return "".join(h_decoded)
def parsedate(datestring):
if datestring is None:
diff --git a/to_sqldb.py b/to_sqldb.py
index 4afa8af..2f709bb 100644
--- a/to_sqldb.py
+++ b/to_sqldb.py
@@ -13,6 +13,7 @@ from dateutil.parser import parse
from dateutil import tz
from kitchen.text.converters import to_bytes
from hashlib import sha1
+from sqlalchemy.exc import OperationalError
from kittystore import get_store
@@ -50,6 +51,11 @@ def to_db(mbfile, list_name, store):
print "%s from %s about %s" % (e.args[0],
e.args[1].get("From"), e.args[1].get("Subject"))
continue
+ except OperationalError, e:
+ print message["From"], message["Subject"], e
+ # Database is locked
+ time.sleep(1)
+ msg_id_hash = store.add_to_list(list_name, message)
store.session.flush()
cnt = cnt + 1
store.session.commit()