8000 gh-98740: Fix validation of conditional expressions in RE (GH-98764) · python/cpython@eb023a8 · GitHub
[go: up one dir, main page]

Skip to content

Commit eb023a8

Browse files
gh-98740: Fix validation of conditional expressions in RE (GH-98764)
In very rare circumstances the JUMP opcode could be confused with the argument of the opcode in the "then" part which doesn't end with the JUMP opcode. This led to incorrect detection of the final JUMP opcode and incorrect calculation of the size of the subexpression. NOTE: Changed return value of functions _validate_inner() and _validate_charset() in Modules/_sre/sre.c. Now they return 0 on success, -1 on failure, and 1 if the last op is JUMP (which usually is a failure). Previously they returned 1 on success and 0 on failure. (cherry picked from commit e9ac890) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent a02979f commit eb023a8

File tree

4 files changed

+40
-27
lines changed

4 files changed

+40
-27
lines changed

Doc/library/re.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,9 @@ The special characters are:
481481
some fixed length. Patterns which start with negative lookbehind assertions may
482482
match at the beginning of the string being searched.
483483

484+
.. _re-conditional-expression:
485+
.. index:: single: (?(; in regular expressions
486+
484487
``(?(id/name)yes-pattern|no-pattern)``
485488
Will try to match with ``yes-pattern`` if the group with given *id* or
486489
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is

Lib/test/test_re.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,11 @@ def test_re_groupref_exists_errors(self):
661661
self.checkPatternError(r'()(?(2)a)',
662662
"invalid group reference 2", 5)
663663

664+
def test_re_groupref_exists_validation_bug(self):
665+
for i in range(256):
666+
with self.subTest(code=i):
667+
re.compile(r'()(?(1)\x%02x?)' % i)
668+
664669
def test_re_groupref_overflow(self):
665670
from re._constants import MAXGROUPS
666671
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix internal error in the :mod:`re` module which in very rare circumstances
2+
prevented compilation of a regular expression containing a :ref:`conditional
3+
expression <re-conditional-expression>` without the "else" branch.

Modules/_sre/sre.c

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,7 +1528,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
15281528
#endif
15291529

15301530
/* Report failure */
1531-
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1531+
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
15321532

15331533
/* Extract opcode, argument, or skip count from code array */
15341534
#define GET_OP \
@@ -1552,7 +1552,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
15521552
skip = *code; \
15531553
VTRACE(("%lu (skip to %p)\n", \
15541554
(unsigned long)skip, code+skip)); \
1555-
if (skip-adj > (uintptr_t)(end - code)) \
1555+
if (skip-adj > (uintptr_t)(end - code)) \
15561556
FAIL; \
15571557
code++; \
15581558
} while (0)
@@ -1641,9 +1641,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
16411641
}
16421642
}
16431643

1644-
return 1;
1644+
return 0;
16451645
}
16461646

1647+
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
16471648
static int
16481649
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
16491650
{
@@ -1721,7 +1722,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17211722
case SRE_OP_IN_LOC_IGNORE:
17221723
GET_SKIP;
17231724
/* Stop 1 before the end; we check the FAILURE below */
1724-
if (!_validate_charset(code, code+skip-2))
1725+
if (_validate_charset(code, code+skip-2))
17251726
FAIL;
17261727
if (code[skip-2] != SRE_OP_FAILURE)
17271728
FAIL;
@@ -1775,7 +1776,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17751776
}
17761777
/* Validate the charset */
17771778
if (flags & SRE_INFO_CHARSET) {
1778-
if (!_validate_charset(code, newcode-1))
1779+
if (_validate_charset(code, newcode-1))
17791780
FAIL;
17801781
if (newcode[-1] != SRE_OP_FAILURE)
17811782
FAIL;
@@ -1796,7 +1797,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17961797
if (skip == 0)
17971798
break;
17981799
/* Stop 2 before the end; we check the JUMP below */
1799-
if (!_validate_inner(code, code+skip-3, groups))
1800+
if (_validate_inner(code, code+skip-3, groups))
18001801
FAIL;
18011802
code += skip-3;
18021803
/* Check that it ends with a JUMP, and that each JUMP
@@ -1810,6 +1811,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18101811
else if (code+skip-1 != target)
18111812
FAIL;
18121813
}
1814+
if (code != target)
1815+
FAIL;
18131816
}
18141817
break;
18151818

@@ -1825,7 +1828,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18251828
FAIL;
18261829
if (max > SRE_MAXREPEAT)
18271830
FAIL;
1828-
if (!_validate_inner(code, code+skip-4, groups))
1831+
if (_validate_inner(code, code+skip-4, groups))
18291832
FAIL;
18301833
code += skip-4;
18311834
GET_OP;
@@ -1845,7 +1848,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18451848
FAIL;
18461849
if (max > SRE_MAXREPEAT)
18471850
FAIL;
1848-
if (!_validate_inner(code, code+skip-3, groups))
1851+
if (_validate_inner(code, code+skip-3, groups))
18491852
FAIL;
18501853
code += skip-3;
18511854
GET_OP;
@@ -1863,7 +1866,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18631866
case SRE_OP_ATOMIC_GROUP:
18641867
{
18651868
GET_SKIP;
1866-
if (!_validate_inner(code, code+skip-2, groups))
1869+
if (_validate_inner(code, code+skip-2, groups))
18671870
FAIL;
18681871
code += skip-2;
18691872
GET_OP;
@@ -1915,24 +1918,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19151918
to allow arbitrary jumps anywhere in the code; so we just look
19161919
for a JUMP opcode preceding our skip target.
19171920
*/
1918-
if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1919-
code[skip-3] == SRE_OP_JUMP)
1920-
{
1921-
VTRACE(("both then and else parts present\n"));
1922-
if (!_validate_inner(code+1, code+skip-3, groups))
1923-
FAIL;
1921+
VTRACE(("then part:\n"));
1922+
int rc = _validate_inner(code+1, code+skip-1, groups);
1923+
if (rc == 1) {
1924+
VTRACE(("else part:\n"));
19241925
code += skip-2; /* Position after JUMP, at <skipno> */
19251926
GET_SKIP;
1926-
if (!_validate_inner(code, code+skip-1, groups))
1927-
FAIL;
1928-
code += skip-1;
1929-
}
1930-
else {
1931-
VTRACE(("only a then part present\n"));
1932-
if (!_validate_inner(code+1, code+skip-1, groups))
1933-
FAIL;
1934-
code += skip-1;
1927+
rc = _validate_inner(code, code+skip-1, groups);
19351928
}
1929+
if (rc)
1930+
FAIL;
1931+
code += skip-1;
19361932
break;
19371933

19381934
case SRE_OP_ASSERT:
@@ -1943,22 +1939,28 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19431939
if (arg & 0x80000000)
19441940
FAIL; /* Width too large */
19451941
/* Stop 1 before the end; we check the SUCCESS below */
1946-
if (!_validate_inner(code+1, code+skip-2, groups))
1942+
if (_validate_inner(code+1, code+skip-2, groups))
19471943
FAIL;
19481944
code += skip-2;
19491945
GET_OP;
19501946
if (op != SRE_OP_SUCCESS)
19511947
FAIL;
19521948
break;
19531949

1950+
case SRE_OP_JUMP:
1951+
if (code + 1 != end)
1952+
FAIL;
1953+
VTRACE(("JUMP: %d\n", __LINE__));
1954+
return 1;
1955+
19541956
default:
19551957
FAIL;
19561958

19571959
}
19581960
}
19591961

19601962
VTRACE(("okay\n"));
1961-
return 1;
1963+
return 0;
19621964
}
19631965

19641966
static int
@@ -1973,7 +1975,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19731975
static int
19741976
_validate(PatternObject *self)
19751977
{
1976-
if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1978+
if (_validate_outer(self->code, self->code+self->codesize, self->groups))
19771979
{
19781980
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
19791981
return 0;

0 commit comments

Comments
 (0)
0