summaryrefslogtreecommitdiffstats
path: root/kittystore
diff options
context:
space:
mode:
authorAslak Knutsen <aslak@redhat.com>2013-03-14 23:40:03 +0100
committerAslak Knutsen <aslak@redhat.com>2013-03-20 11:21:38 +0100
commitba422e76d4889dce2effcfcb9455ac8080024a54 (patch)
tree482b0c340f3dc7ce7048e87004a7f965418bb83c /kittystore
parent8133a3fa2cb2dff9478f14e0980b0176eb95d996 (diff)
downloadkittystore-ba422e76d4889dce2effcfcb9455ac8080024a54.tar.gz
kittystore-ba422e76d4889dce2effcfcb9455ac8080024a54.tar.xz
kittystore-ba422e76d4889dce2effcfcb9455ac8080024a54.zip
Clean up pipermail attachments from email.content
When loading an archive from pipermail, attachments are described in the email body as "-- next part --" pr attachment. This is noise for the reader and should not be part of the email.content displayed to the user. The complete original message can still be seen in the raw view.
Diffstat (limited to 'kittystore')
-rw-r--r--kittystore/scrub.py11
-rw-r--r--kittystore/test/test_scrub.py7
-rw-r--r--kittystore/test/testdata/pipermail_nextpart.txt30
3 files changed, 45 insertions, 3 deletions
diff --git a/kittystore/scrub.py b/kittystore/scrub.py
index 729f0ba..2a54954 100644
--- a/kittystore/scrub.py
+++ b/kittystore/scrub.py
@@ -37,6 +37,8 @@ dre = re.compile(r'^\.*')
BR = '<br>\n'
+NEXT_PART = re.compile(r'--------------[ ]next[ ]part[ ]--------------\n')
+
def guess_extension(ctype, ext):
# mimetypes maps multiple extensions to the same type, e.g. .doc, .dot,
@@ -207,13 +209,16 @@ class Scrubber(object):
if not t.endswith('\n'):
t += '\n'
text.append(t)
- # Now join the text and set the payload
- #sep = _('-------------- next part --------------\n')
- #text = sep.join(text)
+
text = "\n".join(text)
else:
text = self.msg.get_payload(decode=True)
text = text.decode(get_charset(self.msg, guess=True), "replace")
+
+ next_part_match = NEXT_PART.search(text)
+ if next_part_match:
+ text = text[0:next_part_match.start(0)]
+
return (text, attachments)
diff --git a/kittystore/test/test_scrub.py b/kittystore/test/test_scrub.py
index 2e30cd7..3aef0d2 100644
--- a/kittystore/test/test_scrub.py
+++ b/kittystore/test/test_scrub.py
@@ -129,3 +129,10 @@ class TestScrubber(unittest.TestCase):
u'accented letters : \xe9 \xe8 \xe7 \xe0.\r\nAnd an '
u'attachment with an accented filename\r\n\r\n\r\n\r\n')
+ def test_remove_next_part_from_content(self):
+ with open(get_test_file("pipermail_nextpart.txt")) as email_file:
+ msg = email.message_from_file(email_file, _class=Message)
+ scrubber = Scrubber("testlist@example.com", msg)
+ contents, attachments = scrubber.scrub()
+
+ self.failIf("-------------- next part --------------" in contents)
diff --git a/kittystore/test/testdata/pipermail_nextpart.txt b/kittystore/test/testdata/pipermail_nextpart.txt
new file mode 100644
index 0000000..a63c65d
--- /dev/null
+++ b/kittystore/test/testdata/pipermail_nextpart.txt
@@ -0,0 +1,30 @@
+From vondruch at redhat.com Tue Jul 10 11:29:44 2012
+From: vondruch at redhat.com (=?ISO-8859-2?Q?V=EDt_Ondruch?=)
+Date: Tue, 10 Jul 2012 13:29:44 +0200
+Subject: [Fedora-packaging] RPM macros
+Message-ID: <4FFC1228.3060409@redhat.com>
+
+Hi,
+
+I noticed that in revised haskell guidelines [1], there is mentioned the
+ghc-rpm-macros package, which provides macros.ghc file, which in turns
+provides some useful macros for packaging of Haskell packages. In Ruby,
+we provide similar macro files in ruby-devel and rubygems-devel
+subpackages respectively. Perl has their macros directly in the rpm
+package itself.
+
+This seems to be a bit inconsistent to me. So my question is: shouldn't
+we standardize some best practices with regards of RPM macros? For
+example for Ruby, we placed the macros into -devel subpackages, because
+we believe that it is just development dependency. Any opinions?
+
+
+Vit
+
+-------------- next part --------------
+A non-text attachment was scrubbed...
+Name: signature.asc
+Type: application/pgp-signature
+Size: 190 bytes
+Desc: This is a digitally signed message part.
+URL: <http://lists.fedoraproject.org/pipermail/packaging/attachments/20120713/2377d1ee/attachment.sig>