diff options
| author | Aurélien Bompard <aurelien@bompard.org> | 2012-10-02 18:53:55 +0200 |
|---|---|---|
| committer | Aurélien Bompard <aurelien@bompard.org> | 2012-10-02 18:53:55 +0200 |
| commit | 6cce8b8dc1501e992c3c036fb888a15db7c1bca0 (patch) | |
| tree | 767144e1653ad1d4c960c745d315885c40f3644c | |
| parent | b328be7a3260995eae650f9afb09b0225d8e4d29 (diff) | |
| download | kittystore-6cce8b8dc1501e992c3c036fb888a15db7c1bca0.tar.gz kittystore-6cce8b8dc1501e992c3c036fb888a15db7c1bca0.tar.xz kittystore-6cce8b8dc1501e992c3c036fb888a15db7c1bca0.zip | |
Handle non-ascii chars in filename
| -rw-r--r-- | kittystore/scrub.py | 7 | ||||
| -rw-r--r-- | kittystore/test/test_scrub.py | 21 | ||||
| -rw-r--r-- | kittystore/test/testdata/attachment-4.txt | 91 |
3 files changed, 116 insertions, 3 deletions
diff --git a/kittystore/scrub.py b/kittystore/scrub.py index 175f56b..3b1532a 100644 --- a/kittystore/scrub.py +++ b/kittystore/scrub.py @@ -115,7 +115,8 @@ class Scrubber(object): ctype = part.get_content_type() # If the part is text/plain, we leave it alone if ctype == 'text/plain': - if part.get('content-disposition') == "attachment": + disposition = part.get('content-disposition') + if disposition and disposition.strip().startswith("attachment"): # part is attached self.save_attachment(part, part_num) elif ctype == 'text/html' and isinstance(sanitize, IntType): @@ -246,7 +247,7 @@ class Scrubber(object): ctype = part.get_content_type() charset = get_charset(part, default=None, guess=False) # i18n file name is encoded - filename = oneline(part.get_filename(''), charset or "ascii") + filename = oneline(part.get_filename(''), in_unicode=True) filename, fnext = os.path.splitext(filename) # For safety, we should confirm this is valid ext for content-type # but we can use fnext if we introduce fnext filtering @@ -279,7 +280,7 @@ class Scrubber(object): # Strip off leading dots filename = dre.sub('', filename) # Allow only alphanumerics, dash, underscore, and dot - filename = sre.sub('', filename) + #filename = sre.sub('', filename) # If the filename's extension doesn't match the type we guessed, # which one should we go with? For now, let's go with the one we # guessed so attachments can't lie about their type. Also, if the diff --git a/kittystore/test/test_scrub.py b/kittystore/test/test_scrub.py index e0e442a..b42a9d5 100644 --- a/kittystore/test/test_scrub.py +++ b/kittystore/test/test_scrub.py @@ -101,3 +101,24 @@ class TestScrubber(unittest.TestCase): self.assertEqual(contents, u'This message contains non-ascii ' u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n') + def test_attachment_4(self): + with open(get_test_file("attachment-4.txt")) as email_file: + msg = email.message_from_file(email_file) + store = Mock() + scrubber = Scrubber("testlist@example.com", msg, store) + contents = scrubber.scrub() + self.assertEqual(store.add_attachment.call_count, 2) + args_1, args_2 = store.add_attachment.call_args_list + # HTML part + self.assertEqual(args_1[0][0:6], ("testlist@example.com", + "CAHmoxtXXb3un1C=ZvYNtz-eYghm-GH925gDVHyjhvL2YEsZ-Yw@mail.gmail.com", + 3, "attachment.html", "text/html", "iso-8859-1")) + self.assertEqual(len(args_1[0][6]), 114) + # Image attachment + self.assertEqual(args_2[0][0:6], ("testlist@example.com", + "CAHmoxtXXb3un1C=ZvYNtz-eYghm-GH925gDVHyjhvL2YEsZ-Yw@mail.gmail.com", + 4, u"todo-déjeuner.txt", "text/plain", "utf-8")) + self.assertEqual(len(args_2[0][6]), 112) + # Scrubbed content + self.assertEqual(contents, u"This is a test message\r\n") + diff --git a/kittystore/test/testdata/attachment-4.txt b/kittystore/test/testdata/attachment-4.txt new file mode 100644 index 0000000..902464c --- /dev/null +++ b/kittystore/test/testdata/attachment-4.txt @@ -0,0 +1,91 @@ +Return-Path: <list1-bounces@mm3test.fedoraproject.org>
+Delivered-To: test@example.com
+Received: (qmail 13691 invoked from network); 2 Oct 2012 15:52:27 -0000
+Received: from mx16-g26.free.fr (HELO mm3test.fedoraproject.org) (212.27.42.55)
+ by mrelay1-g25.free.fr with SMTP; 2 Oct 2012 15:52:27 -0000
+Received: from mm3test.fedoraproject.org ([152.19.134.144])
+ by mx1-g20.free.fr (MXproxy) for test@example.com;
+ Tue, 2 Oct 2012 17:52:27 +0200 (CEST)
+X-ProXaD-SC: state=HAM score=0
+Received: from vm5.fedora.ibiblio.org (localhost [127.0.0.1])
+ by mm3test.fedoraproject.org (Postfix) with ESMTP id DC46212057E
+ for <test@example.com>; Tue, 2 Oct 2012 15:52:13 +0000 (UTC)
+Received: from mail-qa0-f53.google.com (mail-qa0-f53.google.com
+ [209.85.216.53])
+ by mm3test.fedoraproject.org (Postfix) with ESMTP id E321F120502
+ for <list1@mm3test.fedoraproject.org>;
+ Tue, 2 Oct 2012 15:50:18 +0000 (UTC)
+Received: by qaas11 with SMTP id s11so719463qaa.5
+ for <list1@mm3test.fedoraproject.org>;
+ Tue, 02 Oct 2012 08:50:21 -0700 (PDT)
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113;
+ h=mime-version:sender:from:date:x-google-sender-auth:message-id
+ :subject:to:content-type;
+ bh=e8BR+8jPBSP5O/RrgP1pqvO1/IMWF11Bh461y3FF+3I=;
+ b=lp4ST+8RSmk0O8ka9D0rbVk1MH02MzuQm/BCpqSW6ypbSCoSQZYIO3wCU0WFGZJMTu
+ W+b13GCmhThG4REHa5m8Liok+TmrAdG1eJm0SAihjZTTbDMsxeXbFIqYm33cWFeoqqpv
+ a9U2b45t7oQScuwZJdnMnK6LKgSH8LnSBuQJQ2nbPl8QlIkUuyyytJqZ9n/bar1VsAMS
+ yUQhMLhakWsBRvOQL4YkSMa8QZB7bjsT+OBJs2lWfOeDZMRbaMMw99Iri5ekXW9YChES
+ +YgVFWkpzY8UgCl1v/4U+f8d6B/V+UxC4QGMt6/t2WVaWNvLfcVK6zf4Xz0j6RxoiNb1
+ +elA==
+Received: by 10.49.48.109 with SMTP id k13mr5822578qen.44.1349193020870; Tue,
+ 02 Oct 2012 08:50:20 -0700 (PDT)
+MIME-Version: 1.0
+Received: by 10.49.117.231 with HTTP; Tue, 2 Oct 2012 08:50:00 -0700 (PDT)
+From: Aurelien Bompard <test@example.com>
+Date: Tue, 2 Oct 2012 17:50:00 +0200
+X-Google-Sender-Auth: nM2EQKgDrBvnGOuMzxnkzkPC9ro
+Message-ID: <CAHmoxtXXb3un1C=ZvYNtz-eYghm-GH925gDVHyjhvL2YEsZ-Yw@mail.gmail.com>
+To: list1@mm3test.fedoraproject.org
+Content-Type: multipart/mixed; boundary=047d7b6d9730be0f6104cb157a6f
+X-MailFrom: abompard@gmail.com
+X-Mailman-Rule-Hits: nonmember-moderation
+X-Mailman-Rule-Misses: approved; emergency; loop; member-moderation;
+ administrivia; implicit-dest; max-recipients; max-size;
+ news-moderation; no-subject; suspicious-header
+X-Message-ID-Hash: MAWB5CJS67HFZZUBFWN47URCDJBI75WT
+X-Mailman-Approved-At: Tue, 02 Oct 2012 15:52:11 +0000
+Subject: [List1] another try
+X-Mailman-Version: 3.0.0b2+
+Precedence: list
+List-Id: <list1.mm3test.fedoraproject.org>
+List-Post: <mailto:list1@mm3test.fedoraproject.org>
+List-Subscribe: <http://mm3test.fedoraproject.org/listinfo/list1@mm3test.fedoraproject.org>,
+ <mailto:list1-join@mm3test.fedoraproject.org>
+Archived-At: /hyperkitty/message/list1@mm3test.fedoraproject.org/3EGA7GL3TZXZRHSVS42I3VR63YPHV7I3/
+List-Unsubscribe: <http://mm3test.fedoraproject.org/listinfo/list1@mm3test.fedoraproject.org>,
+ <mailto:list1-leave@mm3test.fedoraproject.org>
+List-Archive: </hyperkitty/archives/list1@mm3test.fedoraproject.org/>
+List-Help: <mailto:list1-request@mm3test.fedoraproject.org?subject=help>
+
+--047d7b6d9730be0f6104cb157a6f
+Content-Type: multipart/alternative; boundary=047d7b6d9730be0f5d04cb157a6d
+
+--047d7b6d9730be0f5d04cb157a6d
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: quoted-printable
+
+This is a test, HTML message with accented letters : =E9 =E8 =E7 =E0.
+And an attachment with an accented filename
+
+--047d7b6d9730be0f5d04cb157a6d
+Content-Type: text/html; charset=ISO-8859-1
+Content-Transfer-Encoding: quoted-printable
+
+This is a test, HTML message with accented letters : =E9 =E8 =E7 =E0.<br>An=
+d an attachment with an accented filename<br>
+
+--047d7b6d9730be0f5d04cb157a6d--
+--047d7b6d9730be0f6104cb157a6f
+Content-Type: text/plain; charset=UTF-8;
+ name="=?ISO-8859-1?Q?todo=2Dd=E9jeuner=2Etxt?="
+Content-Disposition: attachment;
+ filename="=?ISO-8859-1?Q?todo=2Dd=E9jeuner=2Etxt?="
+Content-Transfer-Encoding: base64
+X-Attachment-Id: f_h7t6o6wf0
+
+VmlhbmRlCk1lbnRoZQpQYWluClZpbgoKQ3Vpc2luZTogcHLDqXBhcmVyIGwnYXDDqXJvLCBjb3Vw
+ZXIgZXQgZmFpcmUgcmlzc29sZXIgbGVzIHBhdGF0ZXMsIGV0IGZhaXJlIGxlcyBjb29raWVzCg==
+--047d7b6d9730be0f6104cb157a6f--
+
+
|
