Optimization.

python · serhiy-storchaka · Dec 11, 2023 · Jul 7, 2022 · Jul 7, 2022 · Jul 6, 2022
commit 36619d5871c5eb192ee6cf6eaaaaaf4bf9f0e989
@@ -289,25 +289,26 @@ def get_payload(self, i=None, decode=False):
         # cte might be a Header, so for now stringify it.
         cte = str(self.get('content-transfer-encoding', '')).lower()
         # payload may be bytes here.
-        if isinstance(payload, str):
-            if utils._has_decoded_with_surrogateescape(payload):
-                bpayload = payload.encode('ascii', 'surrogateescape')
-                if not decode:
+        if not decode:
+            if isinstance(payload, str) and utils._has_surrogates(payload):
+                try:
+                    bpayload = payload.encode('ascii', 'surrogateescape')
                     try:
                         payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
                     except LookupError:
                         payload = bpayload.decode('ascii', 'replace')
-            elif decode:
-                try:
-                    bpayload = payload.encode('ascii')
-                except UnicodeError:
-                    # This won't happen for RFC compliant messages (messages
-                    # containing only ASCII code points in the unicode input).
-                    # If it does happen, turn the string into bytes in a way
-                    # guaranteed not to fail.
-                    bpayload = payload.encode('raw-unicode-escape')
-        if not decode:
+                except UnicodeEncodeError:
+                    pass
             return payload
+        if isinstance(payload, str):
+            try:
+                bpayload = payload.encode('ascii', 'surrogateescape')
+            except UnicodeEncodeError:
+                # This won't happen for RFC compliant messages (messages
+                # containing only ASCII code points in the unicode input).
+                # If it does happen, turn the string into bytes in a way
+                # guaranteed not to fail.
+                bpayload = payload.encode('raw-unicode-escape')
         if cte == 'quoted-printable':
             return quopri.decodestring(bpayload)
         elif cte == 'base64':

@@ -47,27 +47,13 @@ def _has_surrogates(s):
     """Return True if s may contain surrogate-escaped binary data."""
     # This check is based on the fact that unless there are surrogates, utf8
     # (Python's default encoding) can encode any string.  This is the fastest
-    # way to check for surrogates, see issue 11454 (moved to GH 55663) for timings.
-    # This will pass some strings that are not valid for surrogateescape encoding.
+    # way to check for surrogates, see bpo-11454 (moved to gh-55663) for timings.
     try:
         s.encode()
         return False
     except UnicodeEncodeError:
         return True
 
-def _has_decoded_with_surrogateescape(s):
-    """Return True if s is a valid str decoded using surrogateescape"""
-    # Slower test than _has_surrogates to be used when the string must
-    # be encodable with surrogateescape, but is no slower if the string
-    # does not have any unicode surrogate characters.
-    if _has_surrogates(s):
-        try:
-            s.encode('ascii', 'surrogateescape')
-        except UnicodeEncodeError:
-            return False
-        return True
-    return False
-
 # How to deal with a string containing bytes before handing it to the
 # application through the 'normal' interface.
 def _sanitize(string):