summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAurélien Bompard <aurelien@bompard.org>2012-10-01 12:28:47 +0200
committerAurélien Bompard <aurelien@bompard.org>2012-10-01 12:28:47 +0200
commite012672451eda6293fd6817036d4dea907f63a4c (patch)
tree0fbc36853a6a3051fde7172aa535303437e95fb2
parent986ed148c065ae626f6f82f2d93a9b2e346a6e3c (diff)
downloadkittystore-e012672451eda6293fd6817036d4dea907f63a4c.tar.gz
kittystore-e012672451eda6293fd6817036d4dea907f63a4c.tar.xz
kittystore-e012672451eda6293fd6817036d4dea907f63a4c.zip
Improve scrubbing
- don't escape HTML contents, it's the frontend's job - don't insert placeholder text when scrubbing attachments.
-rw-r--r--kittystore/scrub.py44
-rw-r--r--kittystore/test/test_scrub.py29
2 files changed, 31 insertions, 42 deletions
diff --git a/kittystore/scrub.py b/kittystore/scrub.py
index 0b11963..04b30e5 100644
--- a/kittystore/scrub.py
+++ b/kittystore/scrub.py
@@ -128,21 +128,22 @@ class Scrubber(object):
#"""), lcset)
# else:
if sanitize == 1:
- # HTML-escape it and store it as an attachment, but make it
- # look a /little/ bit prettier. :(
- payload = websafe(part.get_payload(decode=True))
- # For whitespace in the margin, change spaces into
- # non-breaking spaces, and tabs into 8 of those. Then use a
- # mono-space font. Still looks hideous to me, but then I'd
- # just as soon discard them.
- def doreplace(s):
- return s.expandtabs(8).replace(' ', '&nbsp;')
- lines = [doreplace(s) for s in payload.split('\n')]
- payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n'
- part.set_payload(payload)
- # We're replacing the payload with the decoded payload so this
- # will just get in the way.
- del part['content-transfer-encoding']
+ # Don't HTML-escape it, this is the frontend's job
+ ## HTML-escape it and store it as an attachment, but make it
+ ## look a /little/ bit prettier. :(
+ #payload = websafe(part.get_payload(decode=True))
+ ## For whitespace in the margin, change spaces into
+ ## non-breaking spaces, and tabs into 8 of those. Then use a
+ ## mono-space font. Still looks hideous to me, but then I'd
+ ## just as soon discard them.
+ #def doreplace(s):
+ # return s.expandtabs(8).replace(' ', '&nbsp;')
+ #lines = [doreplace(s) for s in payload.split('\n')]
+ #payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n'
+ #part.set_payload(payload)
+ ## We're replacing the payload with the decoded payload so this
+ ## will just get in the way.
+ #del part['content-transfer-encoding']
self.save_attachment(part, part_num, filter_html=False)
part.set_payload('')
elif ctype == 'message/rfc822':
@@ -188,7 +189,7 @@ class Scrubber(object):
partctype = part.get_content_type()
if partctype <> 'text/plain' and (partctype <> 'text/html' or
sanitize <> 2):
- text.append(_('Skipped content of type %(partctype)s\n'))
+ #text.append(_('Skipped content of type %(partctype)s\n'))
continue
try:
t = part.get_payload(decode=True) or ''
@@ -222,8 +223,9 @@ class Scrubber(object):
t += '\n'
text.append(t)
# Now join the text and set the payload
- sep = _('-------------- next part --------------\n')
- text = sep.join(text)
+ #sep = _('-------------- next part --------------\n')
+ #text = sep.join(text)
+ text = "\n".join(text)
return text
@@ -281,8 +283,10 @@ class Scrubber(object):
# TODO: bring back the HTML sanitizer feature
if ctype == 'message/rfc822':
submsg = part.get_payload()
- # BAW: I'm sure we can eventually do better than this. :(
- decodedpayload = websafe(str(submsg))
+ # Don't HTML-escape it, this is the frontend's job
+ ## BAW: I'm sure we can eventually do better than this. :(
+ #decodedpayload = websafe(str(submsg))
+ decodedpayload = str(submsg)
msg_id = self.msg['Message-Id'].strip("<>")
self.store.add_attachment(
self.mlist, msg_id, counter, filebase+ext,
diff --git a/kittystore/test/test_scrub.py b/kittystore/test/test_scrub.py
index d590709..8637e97 100644
--- a/kittystore/test/test_scrub.py
+++ b/kittystore/test/test_scrub.py
@@ -25,13 +25,9 @@ class TestScrubber(unittest.TestCase):
'end:vcard\r\n\r\n')
self.assertEqual(contents,
"This is a test message.\r\n\r\n"
- "-------------- next part --------------\n"
- "Skipped content of type %(partctype)s\n"
- "-------------- next part --------------\n"
- "-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n"
+ "\n-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n"
"https://admin.fedoraproject.org/mailman/listinfo/devel\n"
)
- self.fail() # Fix the expected text above
def test_attachment_2(self):
with open(get_test_file("attachment-2.txt")) as email_file:
@@ -49,14 +45,10 @@ class TestScrubber(unittest.TestCase):
'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+'
'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n')
self.assertEqual(contents,
- u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse"
- u"\r\n-------------- next part --------------\n"
- u"Skipped content of type %(partctype)s\n"
- u"-------------- next part --------------\n"
- u"-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n"
+ u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n"
+ u"\n-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n"
u"https://admin.fedoraproject.org/mailman/listinfo/devel\n"
)
- self.fail() # Fix the expected text above
def test_attachment_3(self):
with open(get_test_file("attachment-3.txt")) as email_file:
@@ -70,19 +62,14 @@ class TestScrubber(unittest.TestCase):
self.assertEqual(args_1[0][0:5], ("testlist@example.com",
"CACec3Lup8apbhUMcm_Ktn1dPxx4eWr2y1RV7ZSYhy0tzmjSrgQ@mail.gmail.com",
3, "attachment.html", "text/html"))
- self.assertEqual(len(args_1[0][5]), 5812)
+ self.assertEqual(len(args_1[0][5]), 3134)
# Image attachment
self.assertEqual(args_2[0][0:5], ("testlist@example.com",
"CACec3Lup8apbhUMcm_Ktn1dPxx4eWr2y1RV7ZSYhy0tzmjSrgQ@mail.gmail.com",
4, "GeoffreyRoucourt.jpg", "image/jpeg"))
self.assertEqual(len(args_2[0][5]), 282180)
# Scrubbed content
- self.assertEqual(contents,
- u"This is a test message\r\n"
- u"-------------- next part --------------\n"
- u"Skipped content of type %(partctype)s\n"
- )
- self.fail() # Fix the expected text above
+ self.assertEqual(contents, u"This is a test message\r\n")
def test_html_email_1(self):
with open(get_test_file("html-email-1.txt")) as email_file:
@@ -96,11 +83,9 @@ class TestScrubber(unittest.TestCase):
self.assertEqual(args[0:5], ("testlist@example.com",
"016001cd9b3b$b71efed0$255cfc70$@fr",
2, "attachment.html", "text/html"))
- self.assertEqual(len(args[5]), 5093)
+ self.assertEqual(len(args[5]), 2723)
# Scrubbed content
self.assertEqual(contents,
u"This is a test message\r\n"
- u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n"
- )
- self.fail() # Fix the expected text above
+ u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n")