8000 gh-98740: Fix validation of conditional expressions in RE by serhiy-storchaka · Pull Request #98764 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-98740: Fix validation of conditional expressions in RE #98764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,9 @@ The special characters are:
some fixed length. Patterns which start with negative lookbehind assertions may
match at the beginning of the string being searched.

.. _re-conditional-expression:
.. index:: single: (?(; in regular expressions

``(?(id/name)yes-pattern|no-pattern)``
Will try to match with ``yes-pattern`` if the group with given *id* or
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,11 @@ def test_re_groupref_exists_errors(self):
self.checkPatternError(r'()(?(2)a)',
"invalid group reference 2", 5)

def test_re_groupref_exists_validation_bug(self):
for i in range(256):
with self.subTest(code=i):
re.compile(r'()(?(1)\x%02x?)' % i)

def test_re_groupref_overflow(self):
from re._constants import MAXGROUPS
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix internal error in the :mod:`re` module which in very rare circumstances
prevented compilation of a regular expression containing a :ref:`conditional
expression <re-conditional-expression>` without the "else" branch.
56 changes: 29 additions & 27 deletions Modules/_sre/sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,7 @@ _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
#endif

/* Report failure */
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)

/* Extract opcode, argument, or skip count from code array */
#define GET_OP \
Expand All @@ -1647,7 +1647,7 @@ _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
if (skip-adj > (uintptr_t)(end - code)) \
if (skip-adj > (uintptr_t)(end - code)) \
FAIL; \
code++; \
} while (0)
Expand Down Expand Up @@ -1736,9 +1736,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
}

return 1;
return 0;
}

/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
static int
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
Expand Down Expand Up @@ -1816,7 +1817,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_IN_LOC_IGNORE:
GET_SKIP;
/* Stop 1 before the end; we check the FAILURE below */
if (!_validate_charset(code, code+skip-2))
if (_validate_charset(code, code+skip-2))
FAIL;
if (code[skip-2] != SRE_OP_FAILURE)
FAIL;
Expand Down Expand Up @@ -1870,7 +1871,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
}
/* Validate the charset */
if (flags & SRE_INFO_CHARSET) {
if (!_validate_charset(code, newcode-1))
if (_validate_charset(code, newcode-1))
FAIL;
if (newcode[-1] != SRE_OP_FAILURE)
FAIL;
Expand All @@ -1891,7 +1892,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (skip == 0)
break;
/* Stop 2 before the end; we check the JUMP below */
if (!_validate_inner(code, code+skip-3, groups))
if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP
Expand All @@ -1905,6 +1906,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
else if (code+skip-1 != target)
FAIL;
}
if (code != target)
FAIL;
}
break;

Expand All @@ -1920,7 +1923,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-4, groups))
if (_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
GET_OP;
Expand All @@ -1940,7 +1943,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-3, groups))
if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
GET_OP;
Expand All @@ -1958,7 +1961,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_ATOMIC_GROUP:
{
GET_SKIP;
if (!_validate_inner(code, code+skip-2, groups))
if (_validate_inner(code, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
Expand Down Expand Up @@ -2010,24 +2013,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
code[skip-3] == SRE_OP_JUMP)
{
VTRACE(("both then and else parts present\n"));
if (!_validate_inner(code+1, code+skip-3, groups))
FAIL;
VTRACE(("then part:\n"));
int rc = _validate_inner(code+1, code+skip-1, groups);
if (rc == 1) {
VTRACE(("else part:\n"));
code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP;
if (!_validate_inner(code, code+skip-1, groups))
FAIL;
code += skip-1;
}
else {
VTRACE(("only a then part present\n"));
if (!_validate_inner(code+1, code+skip-1, groups))
FAIL;
code += skip-1;
rc = _validate_inner(code, code+skip-1, groups);
}
if (rc)
FAIL;
code += skip-1;
break;

case SRE_OP_ASSERT:
Expand All @@ -2038,22 +2034,28 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (arg & 0x80000000)
FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */
if (!_validate_inner(code+1, code+skip-2, groups))
if (_validate_inner(code+1, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
if (op != SRE_OP_SUCCESS)
FAIL;
break;

case SRE_OP_JUMP:
if (code + 1 != end)
FAIL;
VTRACE(("JUMP: %d\n", __LINE__));
return 1;

default:
FAIL;

}
}

VTRACE(("okay\n"));
return 1;
return 0;
}

static int
Expand All @@ -2068,7 +2070,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
static int
_validate(PatternObject *self)
{
if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
if (_validate_outer(self->code, self->code+self->codesize, self->groups))
{
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0;
Expand Down
0