8000 gh-118761: Optimise import time for ``string`` by AA-Turner · Pull Request #132037 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-1187 8000 61: Optimise import time for string #132037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Optimise import time for string
  • Loading branch information
AA-Turner committed Apr 3, 2025
commit ec78492ef1ae8db70d6b85c03310fad27af6170d
64 changes: 39 additions & 25 deletions Lib/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,20 @@ def capwords(s, sep=None):


####################################################################
import re as _re
from collections import ChainMap as _ChainMap

_sentinel_flags = object()
_sentinel_dict = {}

class Template:
"""A string class for supporting $-substitutions."""

delimiter = '$'
# r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
# without the ASCII flag. We can't add re.ASCII to flags because of
# backward compatibility. So we use the ?a local flag and [a-z] pattern.
# See https://bugs.python.org/issue31672
idpattern = r'(?a:[_a-z][_a-z0-9]*)'
braceidpattern = None
flags = _re.IGNORECASE

def __init_subclass__(cls):
super().__init_subclass__()
if 'pattern' in cls.__dict__:
class _TemplatePattern:
def __get__(self, instance, cls=None):
if cls is None:
return self
import re
if ('pattern' in cls.__dict__
and not isinstance(cls.__dict__['pattern'], _TemplatePattern)):
pattern = cls.pattern
else:
delim = _re.escape(cls.delimiter)
delim = re.escape(cls.delimiter)
id = cls.idpattern
bid = cls.braceidpattern or cls.idpattern
pattern = fr"""
Expand All @@ -82,7 +73,32 @@ def __init_subclass__(cls):
(?P<invalid>) # Other ill-formed delimiter exprs
)
"""
cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
if cls.flags is _sentinel_flags:
cls.flags = re.IGNORECASE
pattern = re.compile(pattern, cls.flags | re.VERBOSE)
# replace this descriptor with the compiled pattern
setattr(cls, 'pattern', pattern)
return pattern


class Template:
"""A string class for supporting $-substitutions."""

delimiter = '$'
# r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
# without the ASCII flag. We can't add re.ASCII to flags because of
# backward compatibility. So we use the ?a local flag and [a-z] pattern.
# See https://bugs.python.org/issue31672
idpattern = r'(?a:[_a-z][_a-z0-9]*)'
braceidpattern = None
flags = _sentinel_flags # default: re.IGNORECASE

# use a descriptor to be able to defer the import of `re`, for performance
pattern = _TemplatePattern()

def __init_subclass__(cls):
super().__init_subclass__()
cls.pattern = _TemplatePattern()

def __init__(self, template):
self.template = template
Expand All @@ -105,7 +121,8 @@ def substitute(self, mapping=_sentinel_dict, /, **kws):
if mapping is _sentinel_dict:
mapping = kws
elif kws:
mapping = _ChainMap(kws, mapping)
from collections import ChainMap
mapping = ChainMap(kws, mapping)
# Helper function for .sub()
def convert(mo):
# Check the most common path first.
Expand All @@ -124,7 +141,8 @@ def safe_substitute(self, mapping=_sentinel_dict, /, **kws):
if mapping is _sentinel_dict:
mapping = kws
elif kws:
mapping = _ChainMap(kws, mapping)
from collections import ChainMap
mapping = ChainMap(kws, mapping)
# Helper function for .sub()
def convert(mo):
named = mo.group('named') or mo.group('braced')
Expand Down Expand Up @@ -170,10 +188,6 @@ def get_identifiers(self):
self.pattern)
return ids

# Initialize Template.pattern. __init_subclass__() is automatically called
# only for subclasses, not for the Template class itself.
Template.__init_subclass__()


########################################################################
# the Formatter class
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve import times by up to 27x for the :mod:`string` module.
Patch by Adam Turner.
Loading
0