From 25653dbb1fa0770acae6dbecd4ac73ac1bb9cb57 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 Jun 2025 16:47:46 +0300 Subject: [PATCH 1/3] gh-105636: Add re.Pattern.compile_template() --- Doc/library/re.rst | 43 ++++++++++- Doc/whatsnew/3.15.rst | 9 +++ Lib/re/__init__.py | 12 ++- Lib/test/test_re.py | 76 +++++++++++++++++++ ...-06-26-18-16-44.gh-issue-105636.Jp6IHC.rst | 2 + Modules/_sre/clinic/sre.c.h | 47 +++++++++++- Modules/_sre/sre.c | 73 +++++++++++++++--- 7 files changed, 246 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-18-16-44.gh-issue-105636.Jp6IHC.rst diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 75ebbf11c8e47c..7ae1d30568f81b 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1064,7 +1064,9 @@ Functions Return the string obtained by replacing the leftmost non-overlapping occurrences of *pattern* in *string* by the replacement *repl*. If the pattern isn't found, - *string* is returned unchanged. *repl* can be a string or a function; if it is + *string* is returned unchanged. + *repl* can be a string, a :ref:`template object `, + or a callable; if it is a string, any backslash escapes in it are processed. That is, ``\n`` is converted to a single newline character, ``\r`` is converted to a carriage return, and so forth. Unknown escapes of ASCII letters are reserved for future use and @@ -1093,6 +1095,13 @@ Functions The pattern may be a string or a :class:`~re.Pattern`. + The replacement string can be compiled as well as the pattern:: + + >>> pat = re.compile(r'def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):') + >>> repl = pat.compile_template(r'static PyObject*\npy_\1(void)\n{') + >>> re.sub(pat, repl, 'def myfunc():') + 'static PyObject*\npy_myfunc(void)\n{' + The optional argument *count* is the maximum number of pattern occurrences to be replaced; *count* must be a non-negative integer. If omitted or zero, all occurrences will be replaced. @@ -1143,6 +1152,9 @@ Functions In future Python versions they will be :ref:`keyword-only parameters `. + .. versionchanged:: next + *repl* can be compiled. + .. function:: subn(pattern, repl, string, count=0, flags=0) @@ -1337,6 +1349,16 @@ Regular Expression Objects Identical to the :func:`subn` function, using the compiled pattern. +.. method:: Pattern.compile_template(repl) + + Compile a replacement string into a :ref:`template object + `, which can be used for replacing patterns in strings + using functions :re:`re.sub` or :re:`re.subn` or corresponding methods of + the :ref:`pattern object `. + + .. versionadded:: next + + .. attribute:: Pattern.flags The regex matching flags. This is a combination of the flags given to @@ -1586,6 +1608,25 @@ when there is no match, you can test whether there was a match with a simple are considered atomic. +.. _template-objects: + +Template Objects +---------------- + +A replacement string can be compiled into a template object using the :meth:`~re.Pattern.compile_template` method. + +.. versionadded:: next + +Template object is a callable which takes a single :ref:`match object +` argument, and returns the replacement string with group +references resolved. + + >>> pat = re.compile('(.)(.)') + >>> templ = pat.compile_template(r'\2-\1') + >>> templ(pat.match('ab')) + 'b-a' + + .. _re-examples: Regular Expression Examples diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 9f327cf904da1b..ef95a381ff91a2 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -126,6 +126,15 @@ os.path (Contributed by Petr Viktorin for :cve:`2025-4517`.) +re +-- + +* Add the :meth:`~re.Pattern.compile_template` method for the + :ref:`pattern object ` which allows to pre-compile + replacement strings. + (Contributed by Serhiy Storchaka in :gh:`105636`.) + + shelve ------ diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index af2808a77da691..4879972303db7c 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -132,7 +132,7 @@ __all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall", "finditer", "compile", "purge", "escape", - "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U", + "error", "Pattern", "Match", "Template", "A", "I", "L", "M", "S", "X", "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "UNICODE", "NOFLAG", "RegexFlag", "PatternError" ] @@ -312,8 +312,12 @@ def escape(pattern): pattern = str(pattern, 'latin1') return pattern.translate(_special_chars_map).encode('latin1') -Pattern = type(_compiler.compile('', 0)) -Match = type(_compiler.compile('', 0).match('')) +p = _compiler.compile('', 0) +Pattern = type(p) +Match = type(p.match('')) +import _sre +Template = type(_sre.template(p, [''])) +del p # -------------------------------------------------------------------- # internals @@ -374,6 +378,8 @@ def _compile(pattern, flags): @functools.lru_cache(_MAXCACHE) def _compile_template(pattern, repl): # internal: compile replacement pattern + if isinstance(repl, Template): + return repr return _sre.template(pattern, _parser.parse_template(repl, pattern)) # register myself for pickling diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index e9128ac1d9762d..c01cdf6b558866 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2889,6 +2889,79 @@ def test_flags_repr(self): "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.DEBUG|0xffe01") +class TemplateTests(unittest.TestCase): + def test_literal(self): + p = re.compile(r'\w') + t = p.compile_template('a') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, 'x-yz'), 'a-aa') + self.assertEqual(p.sub(t, 'x-yz'), 'a-aa') + self.assertEqual(re.subn(p, t, 'x-yz', count=2), ('a-az', 2)) + self.assertEqual(p.subn(t, 'x-yz', 2), ('a-az', 2)) + + p = re.compile(br'\w') + t = p.compile_template(b'a') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, b'x-yz'), b'a-aa') + self.assertEqual(p.sub(t, b'x-yz'), b'a-aa') + self.assertEqual(re.subn(p, t, b'x-yz', count=2), (b'a-az', 2)) + self.assertEqual(p.subn(t, b'x-yz', 2), (b'a-az', 2)) + + def test_group_refs(self): + p = re.compile(r'(\w)(\w)') + t = p.compile_template(r'[\2-\1]') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, 'xyzt'), '[y-x][t-z]') + self.assertEqual(p.sub(t, 'xyzt'), '[y-x][t-z]') + + p = re.compile(br'(\w)(\w)') + t = p.compile_template(br'[\2-\1]') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, b'xyzt'), b'[y-x][t-z]') + self.assertEqual(p.sub(t, b'xyzt'), b'[y-x][t-z]') + + def test_group_refs_emplty_literals(self): + p = re.compile(r'(\w)(\w)') + t = p.compile_template(r'\2\1') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, 'xyzt'), 'yxtz') + self.assertEqual(p.sub(t, 'xyzt'), 'yxtz') + + p = re.compile(br'(\w)(\w)') + t = p.compile_template(br'\2\1') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, b'xyzt'), b'yxtz') + self.assertEqual(p.sub(t, b'xyzt'), b'yxtz') + + def test_symbolic_group_refs(self): + p = re.compile(r'(?P\w)(?P\w)') + t = p.compile_template(r'[\g-\g]') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, 'xyzt'), '[y-x][t-z]') + self.assertEqual(p.sub(t, 'xyzt'), '[y-x][t-z]') + + p = re.compile(br'(?P\w)(?P\w)') + t = p.compile_template(br'[\g-\g]') + self.assertIsInstance(t, re.Template) + self.assertEqual(re.sub(p, t, b'xyzt'), b'[y-x][t-z]') + self.assertEqual(p.sub(t, b'xyzt'), b'[y-x][t-z]') + + def test_call(self): + p = re.compile(r'(\w)(\w)') + t = p.compile_template(r'[\2-\1]') + m = p.search(' xy ') + self.assertEqual(t(m), '[y-x]') + self.assertRaises(TypeError, t, None) + self.assertRaises(TypeError, t, {}) + + p = re.compile(br'(\w)(\w)') + t = p.compile_template(br'[\2-\1]') + m = p.search(b' xy ') + self.assertEqual(t(m), b'[y-x]') + self.assertRaises(TypeError, t, None) + self.assertRaises(TypeError, t, {}) + + class ImplementationTest(unittest.TestCase): """ Test implementation details of the re module. @@ -2901,6 +2974,8 @@ def test_immutable(self): re.Match.foo = 1 with self.assertRaises(TypeError): re.Pattern.foo = 1 + with self.assertRaises(TypeError): + re.Template.foo = 1 with self.assertRaises(TypeError): pat = re.compile("") tp = type(pat.scanner("")) @@ -2924,6 +2999,7 @@ def test_disallow_instantiation(self): # Ensure that the type disallows instantiation (bpo-43916) check_disallow_instantiation(self, re.Match) check_disallow_instantiation(self, re.Pattern) + check_disallow_instantiation(self, re.Template) pat = re.compile("") check_disallow_instantiation(self, type(pat.scanner(""))) diff --git a/Misc/NEWS.d/next/Library/2025-06-26-18-16-44.gh-issue-105636.Jp6IHC.rst b/Misc/NEWS.d/next/Library/2025-06-26-18-16-44.gh-issue-105636.Jp6IHC.rst new file mode 100644 index 00000000000000..b790c8ef2abeee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-18-16-44.gh-issue-105636.Jp6IHC.rst @@ -0,0 +1,2 @@ +Add the :meth:`~re.Pattern.compile_template` method for the :ref:`pattern +object ` which allows to pre-compile replacement strings. diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h index d2f25a71495cda..c8ac80c7918011 100644 --- a/Modules/_sre/clinic/sre.c.h +++ b/Modules/_sre/clinic/sre.c.h @@ -978,6 +978,51 @@ _sre_SRE_Pattern_subn(PyObject *self, PyTypeObject *cls, PyObject *const *args, return return_value; } +PyDoc_STRVAR(_sre_SRE_Pattern_compile_template__doc__, +"compile_template($self, repl, /)\n" +"--\n" +"\n" +"Compile a replacement string into a template object."); + +#define _SRE_SRE_PATTERN_COMPILE_TEMPLATE_METHODDEF \ + {"compile_template", _PyCFunction_CAST(_sre_SRE_Pattern_compile_template), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_compile_template__doc__}, + +static PyObject * +_sre_SRE_Pattern_compile_template_impl(PatternObject *self, + PyTypeObject *cls, PyObject *repl); + +static PyObject * +_sre_SRE_Pattern_compile_template(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "compile_template", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *repl; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + repl = args[0]; + return_value = _sre_SRE_Pattern_compile_template_impl((PatternObject *)self, cls, repl); + +exit: + return return_value; +} + PyDoc_STRVAR(_sre_SRE_Pattern___copy____doc__, "__copy__($self, /)\n" "--\n" @@ -1568,4 +1613,4 @@ _sre_SRE_Scanner_search(PyObject *self, PyTypeObject *cls, PyObject *const *args #ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */ -/*[clinic end generated code: output=bbf42e1de3bdd3ae input=a9049054013a1b77]*/ +/*[clinic end generated code: output=76344e4385bd1406 input=a9049054013a1b77]*/ diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 602d0ab8588f62..446b2813cf1a31 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -43,6 +43,7 @@ static const char copyright[] = #include "pycore_dict.h" // _PyDict_Next() #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_unicodeobject.h" // _PyUnicode_Copy #include "sre.h" // SRE_CODE @@ -1247,7 +1248,9 @@ pattern_subx(_sremodulestate* module_state, if (PyCallable_Check(ptemplate)) { /* sub/subn takes either a function or a template */ filter = Py_NewRef(ptemplate); - filter_type = CALLABLE; + filter_type = (Py_TYPE(filter) == module_state->Template_Type) + ? TEMPLATE + : CALLABLE; } else { /* if not callable, check if it's a literal string */ int literal; @@ -1273,15 +1276,15 @@ pattern_subx(_sremodulestate* module_state, if (!filter) return NULL; - assert(Py_TYPE(filter) == module_state->Template_Type); - if (Py_SIZE(filter) == 0) { - Py_SETREF(filter, - Py_NewRef(((TemplateObject *)filter)->literal)); - filter_type = LITERAL; - } - else { - filter_type = TEMPLATE; - } + filter_type = TEMPLATE; + } + } + if (filter_type == TEMPLATE) { + assert(Py_TYPE(filter) == module_state->Template_Type); + if (Py_SIZE(filter) == 0) { + Py_SETREF(filter, + Py_NewRef(((TemplateObject *)filter)->literal)); + filter_type = LITERAL; } } @@ -1462,6 +1465,25 @@ _sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, return pattern_subx(module_state, self, repl, string, count, 1); } +/*[clinic input] +_sre.SRE_Pattern.compile_template + + cls: defining_class + repl: object + / + +Compile a replacement string into a template object. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_compile_template_impl(PatternObject *self, + PyTypeObject *cls, PyObject *repl) +/*[clinic end generated code: output=1844907c9fead66a input=1dc068ecb1a70712]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + return compile_template(module_state, self, repl); +} + /*[clinic input] _sre.SRE_Pattern.__copy__ @@ -2984,6 +3006,8 @@ pattern_scanner(_sremodulestate *module_state, /* -------------------------------------------------------------------- */ /* template methods */ +PyDoc_STRVAR(template_doc, "Compiled replacement string."); + static int template_traverse(PyObject *op, visitproc visit, void *arg) { @@ -3083,6 +3107,23 @@ expand_template(TemplateObject *self, MatchObject *match) return result; } +static PyObject * +template_call(TemplateObject *self, PyObject *args, PyObject *kwargs) +{ + MatchObject *match; + if (!_PyArg_NoKeywords("template", kwargs)) + return NULL; + + _sremodulestate *state = PyType_GetModuleState(Py_TYPE(self)); + if (state == NULL) { + return NULL; + } + if (!PyArg_ParseTuple(args, "O!", state->Match_Type, &match)) + return NULL; + + return expand_template(self, match); +} + static Py_hash_t pattern_hash(PyObject *op) @@ -3170,6 +3211,7 @@ static PyMethodDef pattern_methods[] = { _SRE_SRE_PATTERN_SPLIT_METHODDEF _SRE_SRE_PATTERN_FINDITER_METHODDEF _SRE_SRE_PATTERN_SCANNER_METHODDEF + _SRE_SRE_PATTERN_COMPILE_TEMPLATE_METHODDEF _SRE_SRE_PATTERN___COPY___METHODDEF _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF @@ -3316,15 +3358,24 @@ static PyType_Spec scanner_spec = { .slots = scanner_slots, }; +static PyMethodDef template_methods[] = { + {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, + PyDoc_STR("See PEP 585")}, + {NULL, NULL} +}; + static PyType_Slot template_slots[] = { {Py_tp_dealloc, template_dealloc}, + {Py_tp_doc, (void *)template_doc}, + {Py_tp_methods, template_methods}, {Py_tp_traverse, template_traverse}, {Py_tp_clear, template_clear}, + {Py_tp_call, template_call}, {0, NULL}, }; static PyType_Spec template_spec = { - .name = "_sre.SRE_Template", + .name = "re.Template", .basicsize = sizeof(TemplateObject), .itemsize = sizeof(((TemplateObject *)0)->items[0]), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | From d4a69a2ab7d840eb77c623eb74179af67e321b4a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 Jun 2025 19:31:37 +0300 Subject: [PATCH 2/3] Fix typos in the docs. --- Doc/library/re.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 7ae1d30568f81b..2fd8dd1338e87a 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1353,8 +1353,8 @@ Regular Expression Objects Compile a replacement string into a :ref:`template object `, which can be used for replacing patterns in strings - using functions :re:`re.sub` or :re:`re.subn` or corresponding methods of - the :ref:`pattern object `. + using functions :func:`re.sub` or :func:`re.subn` or corresponding methods + of the :ref:`pattern object `. .. versionadded:: next From 62b26e8f8fec04e02536cbc9a2fbea7ed42ca6dc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 10:03:35 +0300 Subject: [PATCH 3/3] Try to fix an undefined behavior. --- Modules/_sre/sre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 446b2813cf1a31..93cdd6e77e3f7e 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -3108,7 +3108,7 @@ expand_template(TemplateObject *self, MatchObject *match) } static PyObject * -template_call(TemplateObject *self, PyObject *args, PyObject *kwargs) +template_call(PyObject *self, PyObject *args, PyObject *kwargs) { MatchObject *match; if (!_PyArg_NoKeywords("template", kwargs)) @@ -3121,7 +3121,7 @@ template_call(TemplateObject *self, PyObject *args, PyObject *kwargs) if (!PyArg_ParseTuple(args, "O!", state->Match_Type, &match)) return NULL; - return expand_template(self, match); + return expand_template((TemplateObject *)self, match); }