From 3a92fce45aa5e2e0bd53a081213c725af76d72e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:05:58 +0100 Subject: [PATCH 1/8] Improve import time of the `base64` module. Importing `base64` is now up to six times faster. The `re` module is now locally imported by `base64.b16decode` and is no more implicitly exposed as `base64.re`. --- Lib/base64.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/base64.py b/Lib/base64.py index 61be4fb856e92c..ca17b9995ee658 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -4,7 +4,6 @@ # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere -import re import struct import binascii @@ -281,6 +280,8 @@ def b16decode(s, casefold=False): s is incorrectly padded or if there are non-alphabet characters present in the input. """ + import re + s = _bytes_from_decode_data(s) if casefold: s = s.upper() From 0b52712c2bc4b7ff3bed98c0825ef2dbfabd33c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:06:57 +0100 Subject: [PATCH 2/8] blurb --- .../next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst diff --git a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst new file mode 100644 index 00000000000000..11e66e98e57323 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst @@ -0,0 +1,2 @@ +Improve import time of :mod:`base64` by a factor six. This is achieved by +importing :mod:`re` on demand. Patch by Bénédikt Tran. From d7b6253759ead4e28ba664d0950a0c3aaeb0036a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:22:54 +0100 Subject: [PATCH 3/8] Improve `base64.b16decode()` by a factor 10. This entirely removes the needs to a regex. --- Lib/base64.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index ca17b9995ee658..5d78cc09f40cd3 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -280,12 +280,10 @@ def b16decode(s, casefold=False): s is incorrectly padded or if there are non-alphabet characters present in the input. """ - import re - s = _bytes_from_decode_data(s) if casefold: s = s.upper() - if re.search(b'[^0-9A-F]', s): + if s.translate(None, delete=b'0123456789ABCDEF'): raise binascii.Error('Non-base16 digit found') return binascii.unhexlify(s) From 9e8fe5dbc099105c6768cb3588d15209412b86df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:27:09 +0100 Subject: [PATCH 4/8] update NEWS --- .../Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst index 11e66e98e57323..3d9f339693028a 100644 --- a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst +++ b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst @@ -1,2 +1,4 @@ -Improve import time of :mod:`base64` by a factor six. This is achieved by -importing :mod:`re` on demand. Patch by Bénédikt Tran. +Improve performance of :func:`base64.b16decode` by a factor ten by +removing an un-necessary regular expression. Consequently, :mod:`re` +is no more implicitly available as ``base64.re`` and import time of +:mod:`base64` is improved by a factor six. Patch by Bénédikt Tran. From 5be6178e771adbd7f197fc2fc461e63c89eb021b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:28:36 +0100 Subject: [PATCH 5/8] Update Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .../next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst index 3d9f339693028a..e247557795d945 100644 --- a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst +++ b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst @@ -1,4 +1,4 @@ Improve performance of :func:`base64.b16decode` by a factor ten by -removing an un-necessary regular expression. Consequently, :mod:`re` +removing an unnecessary regular expression. Consequently, :mod:`re` is no more implicitly available as ``base64.re`` and import time of :mod:`base64` is improved by a factor six. Patch by Bénédikt Tran. From b36d9156fe89441f35fa6c50cc744b46174c095f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:25:20 +0100 Subject: [PATCH 6/8] add What's New entry --- Doc/whatsnew/3.14.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f365db37217e95..45888c552014ed 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -708,6 +708,17 @@ asyncio reduces memory usage. (Contributed by Kumar Aditya in :gh:`107803`.) + +base64 +------ + +* Improve performance of :func:`base64.b16decode` by a factor ten by + removing an unnecessary regular expression. Consequently, :mod:`re` + is no more implicitly available as ``base64.re`` and import time of + :mod:`base64` is improved by a factor six. + (Contributed by Bénédikt Tran in :gh:`118761`.) + + io --- * :mod:`io` which provides the built-in :func:`open` makes less system calls From 5562c81785ada71cd2f19de995a0c855aa476f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:43:01 +0100 Subject: [PATCH 7/8] Update Doc/whatsnew/3.14.rst Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/whatsnew/3.14.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 45888c552014ed..21b69c02694373 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -712,11 +712,9 @@ asyncio base64 ------ -* Improve performance of :func:`base64.b16decode` by a factor ten by - removing an unnecessary regular expression. Consequently, :mod:`re` - is no more implicitly available as ``base64.re`` and import time of - :mod:`base64` is improved by a factor six. - (Contributed by Bénédikt Tran in :gh:`118761`.) +* Improve the performance of :func:`base64.b16decode` by up to ten times, + and reduce the import time of :mod:`base64` by up to six times. + (Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner in :gh:`118761`.) io From 82d18d7b522ed60b6b042d88e8046061f3dfd49f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:44:06 +0100 Subject: [PATCH 8/8] Update Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- .../2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst index e247557795d945..37c25cb2efd034 100644 --- a/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst +++ b/Misc/NEWS.d/next/Library/2025-01-10-13-06-54.gh-issue-118761.f8oADD.rst @@ -1,4 +1,5 @@ -Improve performance of :func:`base64.b16decode` by a factor ten by -removing an unnecessary regular expression. Consequently, :mod:`re` -is no more implicitly available as ``base64.re`` and import time of -:mod:`base64` is improved by a factor six. Patch by Bénédikt Tran. +Improve the performance of :func:`base64.b16decode` by up to ten times +by more efficiently checking the byte-string for hexadecimal digits. +Reduce the import time of :mod:`base64` by up to six times, +by no longer importing :mod:`re`. +Patch by Bénédikt Tran, Chris Markiewicz, and Adam Turner.