10000 get_filename(), get_content_charset(): It's possible that the charset… · xiaolanpython/cpython@712d474 · GitHub
[go: up one dir, main page]

Skip to content

Commit 712d474

Browse files
committed
get_filename(), get_content_charset(): It's possible that the charset named in
an RFC 2231-style header could be bogus or unknown to Python. In that case, we return the the text part of the parameter undecoded. However, in get_content_charset(), if that is not ascii, then it is an illegal charset and so we return failobj. Test cases and a version bump are included. Committing this to the Python 2.3 branch because I need to generate an email 2.5.6 release that contains these patches. I will port these fixes to Python 2.4 and 2.5 for email 3.x.
1 parent af0659f commit 712d474

File tree

3 files changed

+69
-9
lines changed

3 files changed

+69
-9
lines changed

Lib/email/Message.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
# Copyright (C) 2001,2002 Python Software Foundation
2-
# Author: barry@zope.com (Barry Warsaw)
1+
# Copyright (C) 2001-2005 Python Software Foundation
2+
# Author: barry@python.org (Barry Warsaw)
33

4-
"""Basic message object for the email package object model.
5-
"""
4+
"""Basic message object for the email package object model."""
65

76
import re
87
import uu
@@ -728,7 +727,13 @@ def get_filename(self, failobj=None):
728727
if isinstance(filename, TupleType):
729728
# It's an RFC 2231 encoded parameter
730729
newvalue = _unquotevalue(filename)
731-
return unicode(newvalue[2], newvalue[0] or 'us-ascii')
730+
try:
731+
return unicode(newvalue[2], newvalue[0] or 'us-ascii')
732+
# LookupError can get raised if the charset isn't known to Python.
733+
# UnicodeError can get raised if the encoded text contains a
734+
# character not in the charset.
735+
except (LookupError, UnicodeError):
736+
return newvalue[2]
732737
else:
733738
newvalue = _unquotevalue(filename.strip())
734739
return newvalue
@@ -815,7 +820,18 @@ def get_content_charset(self, failobj=None):
815820
if isinstance(charset, TupleType):
816821
# RFC 2231 encoded, so decode it, and it better end up as ascii.
817822
pcharset = charset[0] or 'us-ascii'
818-
charset = unicode(charset[2], pcharset).encode('us-ascii')
823+
try:
824+
charset = unicode(charset[2], pcharset).encode('us-ascii')
825+
# LookupError can get raised if the charset isn't known to Python.
826+
# UnicodeError can get raised if the encoded text contains a
827+
# character not in the charset.
828+
except (LookupError, UnicodeError):
829+
charset = charset[2]
830+
# charset characters should be in us-ascii range
831+
try:
832+
charset = unicode(charset, 'us-ascii').encode('us-ascii')
833+
except UnicodeError:
834+
return failobj
819835
# RFC 2046, $4.1.2 says charsets are not case sensitive
820836
return charset.lower()
821837

Lib/email/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
# Copyright (C) 2001-2004 Python Software Foundation
1+
# Copyright (C) 2001-2005 Python Software Foundation
22
# Author: barry@python.org (Barry Warsaw)
33

44
"""A package for parsing, handling, and generating email messages.
55
"""
66

7-
__version__ = '2.5.5'
7+
__version__ = '2.5.6'
88

99
__all__ = [
1010
'base64MIME',

Lib/email/test/test_email.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2001,2002,2003 Python Software Foundation
1+
# Copyright (C) 2001-2005 Python Software Foundation
22
# email package unit tests
33

44
import os
@@ -2758,6 +2758,50 @@ def test_rfc2231_no_language_or_charset_in_charset(self):
27582758
self.assertEqual(msg.get_content_charset(),
27592759
'this is even more ***fun*** is it not.pdf')
27602760

2761+
def test_rfc2231_bad_encoding_in_filename(self):
2762+
m = '''\
2763+
Content-Disposition: inline;
2764+
\tfilename*0="bogus'xx'This%20is%20even%20more%20";
2765+
\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
2766+
\tfilename*2="is it not.pdf"
2767+
2768+
'''
2769+
msg = email.message_from_string(m)
2770+
self.assertEqual(msg.get_filename(),
108F6 2771+
'This is even more ***fun*** is it not.pdf')
2772+
2773+
def test_rfc2231_bad_encoding_in_charset(self):
2774+
m = """\
2775+
Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
2776+
2777+
"""
2778+
msg = email.message_from_string(m)
2779+
# This should return None because non-ascii characters in the charset
2780+
# are not allowed.
2781+
self.assertEqual(msg.get_content_charset(), None)
2782+
2783+
def test_rfc2231_bad_character_in_charset(self):
2784+
m = """\
2785+
Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
2786+
2787+
"""
2788+
msg = email.message_from_string(m)
2789+
# This should return None because non-ascii characters in the charset
2790+
# are not allowed.
2791+
self.assertEqual(msg.get_content_charset(), None)
2792+
2793+
def test_rfc2231_bad_character_in_filename(self):
2794+
m = '''\
2795+
Content-Disposition: inline;
2796+
\tfilename*0="ascii'xx'This%20is%20even%20more%20";
2797+
\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
2798+
\tfilename*2="is it not.pdf%E2"
2799+
2800+
'''
2801+
msg = email.message_from_string(m)
2802+
self.assertEqual(msg.get_filename(),
2803+
'This is even more ***fun*** is it not.pdf\xe2')
2804+
27612805

27622806

27632807
def _testclasses():

0 commit comments

Comments
 (0)
0