diff options
author | Aurélien Bompard <aurelien@bompard.org> | 2012-10-01 12:28:47 +0200 |
---|---|---|
committer | Aurélien Bompard <aurelien@bompard.org> | 2012-10-01 12:28:47 +0200 |
commit | e012672451eda6293fd6817036d4dea907f63a4c (patch) | |
tree | 0fbc36853a6a3051fde7172aa535303437e95fb2 | |
parent | 986ed148c065ae626f6f82f2d93a9b2e346a6e3c (diff) | |
download | kittystore-e012672451eda6293fd6817036d4dea907f63a4c.tar.gz kittystore-e012672451eda6293fd6817036d4dea907f63a4c.tar.xz kittystore-e012672451eda6293fd6817036d4dea907f63a4c.zip |
Improve scrubbing
- don't escape HTML contents, it's the frontend's job
- don't insert placeholder text when scrubbing attachments.
-rw-r--r-- | kittystore/scrub.py | 44 | ||||
-rw-r--r-- | kittystore/test/test_scrub.py | 29 |
2 files changed, 31 insertions, 42 deletions
diff --git a/kittystore/scrub.py b/kittystore/scrub.py index 0b11963..04b30e5 100644 --- a/kittystore/scrub.py +++ b/kittystore/scrub.py @@ -128,21 +128,22 @@ class Scrubber(object): #"""), lcset) # else: if sanitize == 1: - # HTML-escape it and store it as an attachment, but make it - # look a /little/ bit prettier. :( - payload = websafe(part.get_payload(decode=True)) - # For whitespace in the margin, change spaces into - # non-breaking spaces, and tabs into 8 of those. Then use a - # mono-space font. Still looks hideous to me, but then I'd - # just as soon discard them. - def doreplace(s): - return s.expandtabs(8).replace(' ', ' ') - lines = [doreplace(s) for s in payload.split('\n')] - payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n' - part.set_payload(payload) - # We're replacing the payload with the decoded payload so this - # will just get in the way. - del part['content-transfer-encoding'] + # Don't HTML-escape it, this is the frontend's job + ## HTML-escape it and store it as an attachment, but make it + ## look a /little/ bit prettier. :( + #payload = websafe(part.get_payload(decode=True)) + ## For whitespace in the margin, change spaces into + ## non-breaking spaces, and tabs into 8 of those. Then use a + ## mono-space font. Still looks hideous to me, but then I'd + ## just as soon discard them. + #def doreplace(s): + # return s.expandtabs(8).replace(' ', ' ') + #lines = [doreplace(s) for s in payload.split('\n')] + #payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n' + #part.set_payload(payload) + ## We're replacing the payload with the decoded payload so this + ## will just get in the way. + #del part['content-transfer-encoding'] self.save_attachment(part, part_num, filter_html=False) part.set_payload('') elif ctype == 'message/rfc822': @@ -188,7 +189,7 @@ class Scrubber(object): partctype = part.get_content_type() if partctype <> 'text/plain' and (partctype <> 'text/html' or sanitize <> 2): - text.append(_('Skipped content of type %(partctype)s\n')) + #text.append(_('Skipped content of type %(partctype)s\n')) continue try: t = part.get_payload(decode=True) or '' @@ -222,8 +223,9 @@ class Scrubber(object): t += '\n' text.append(t) # Now join the text and set the payload - sep = _('-------------- next part --------------\n') - text = sep.join(text) + #sep = _('-------------- next part --------------\n') + #text = sep.join(text) + text = "\n".join(text) return text @@ -281,8 +283,10 @@ class Scrubber(object): # TODO: bring back the HTML sanitizer feature if ctype == 'message/rfc822': submsg = part.get_payload() - # BAW: I'm sure we can eventually do better than this. :( - decodedpayload = websafe(str(submsg)) + # Don't HTML-escape it, this is the frontend's job + ## BAW: I'm sure we can eventually do better than this. :( + #decodedpayload = websafe(str(submsg)) + decodedpayload = str(submsg) msg_id = self.msg['Message-Id'].strip("<>") self.store.add_attachment( self.mlist, msg_id, counter, filebase+ext, diff --git a/kittystore/test/test_scrub.py b/kittystore/test/test_scrub.py index d590709..8637e97 100644 --- a/kittystore/test/test_scrub.py +++ b/kittystore/test/test_scrub.py @@ -25,13 +25,9 @@ class TestScrubber(unittest.TestCase): 'end:vcard\r\n\r\n') self.assertEqual(contents, "This is a test message.\r\n\r\n" - "-------------- next part --------------\n" - "Skipped content of type %(partctype)s\n" - "-------------- next part --------------\n" - "-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n" + "\n-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n" "https://admin.fedoraproject.org/mailman/listinfo/devel\n" ) - self.fail() # Fix the expected text above def test_attachment_2(self): with open(get_test_file("attachment-2.txt")) as email_file: @@ -49,14 +45,10 @@ class TestScrubber(unittest.TestCase): 'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+' 'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n') self.assertEqual(contents, - u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse" - u"\r\n-------------- next part --------------\n" - u"Skipped content of type %(partctype)s\n" - u"-------------- next part --------------\n" - u"-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n" + u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n" + u"\n-- \ndevel mailing list\ndevel@lists.fedoraproject.org\n" u"https://admin.fedoraproject.org/mailman/listinfo/devel\n" ) - self.fail() # Fix the expected text above def test_attachment_3(self): with open(get_test_file("attachment-3.txt")) as email_file: @@ -70,19 +62,14 @@ class TestScrubber(unittest.TestCase): self.assertEqual(args_1[0][0:5], ("testlist@example.com", "CACec3Lup8apbhUMcm_Ktn1dPxx4eWr2y1RV7ZSYhy0tzmjSrgQ@mail.gmail.com", 3, "attachment.html", "text/html")) - self.assertEqual(len(args_1[0][5]), 5812) + self.assertEqual(len(args_1[0][5]), 3134) # Image attachment self.assertEqual(args_2[0][0:5], ("testlist@example.com", "CACec3Lup8apbhUMcm_Ktn1dPxx4eWr2y1RV7ZSYhy0tzmjSrgQ@mail.gmail.com", 4, "GeoffreyRoucourt.jpg", "image/jpeg")) self.assertEqual(len(args_2[0][5]), 282180) # Scrubbed content - self.assertEqual(contents, - u"This is a test message\r\n" - u"-------------- next part --------------\n" - u"Skipped content of type %(partctype)s\n" - ) - self.fail() # Fix the expected text above + self.assertEqual(contents, u"This is a test message\r\n") def test_html_email_1(self): with open(get_test_file("html-email-1.txt")) as email_file: @@ -96,11 +83,9 @@ class TestScrubber(unittest.TestCase): self.assertEqual(args[0:5], ("testlist@example.com", "016001cd9b3b$b71efed0$255cfc70$@fr", 2, "attachment.html", "text/html")) - self.assertEqual(len(args[5]), 5093) + self.assertEqual(len(args[5]), 2723) # Scrubbed content self.assertEqual(contents, u"This is a test message\r\n" - u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n" - ) - self.fail() # Fix the expected text above + u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n") |