10000 gh-91616: re module, fix .fullmatch() mismatch when using Atomic Grou… · python/cpython@e4e8895 · GitHub
[go: up one dir, main page]

Skip to content

Commit e4e8895

Browse files
author
Ma Lin
authored
gh-91616: re module, fix .fullmatch() mismatch when using Atomic Grouping or Possessive Quantifiers (GH-91681)
These jumps should use DO_JUMP0() instead of DO_JUMP(): - JUMP_POSS_REPEAT_1 - JUMP_POSS_REPEAT_2 - JUMP_ATOMIC_GROUP
1 parent 061a8bf commit e4e8895

File tree

4 files changed

+37
-8
lines changed

4 files changed

+37
-8
lines changed

Lib/test/test_pathlib.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,7 @@ class _BasePathTest(object):
13881388
# | |-- dirD
13891389
# | | `-- fileD
13901390
# | `-- fileC
1391+
# | `-- novel.txt
13911392
# |-- dirE # No permissions
13921393
# |-- fileA
13931394
# |-- linkA -> fileA
@@ -1412,6 +1413,8 @@ def cleanup():
14121413
f.write(b"this is file B\n")
14131414
with open(join('dirC', 'fileC'), 'wb') as f:
14141415
f.write(b"this is file C\n")
1416+
with open(join('dirC', 'novel.txt'), 'wb') as f:
1417+
f.write(b"this is a novel\n")
14151418
with open(join('dirC', 'dirD', 'fileD'), 'wb') as f:
14161419
f.write(b"this is file D\n")
14171420
os.chmod(join('dirE'), 0)
@@ -1679,6 +1682,9 @@ def _check(glob, expected):
16791682
p = P(BASE, "dirC")
16801683
_check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"])
16811684
_check(p.rglob("*/*"), ["dirC/dirD/fileD"])
1685+
# gh-91616, a re module regression
1686+
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
1687+
_check(p.rglob("*.*"), ["dirC/novel.txt"])
16821688

16831689
@os_helper.skip_unless_symlink
16841690
def test_rglob_symlink_loop(self):
@@ -1689,7 +1695,8 @@ def test_rglob_symlink_loop(self):
16891695
expect = {'brokenLink',
16901696
'dirA', 'dirA/linkC',
16911697
'dirB', 'dirB/fileB', 'dirB/linkD',
1692-
'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC',
1698+
'dirC', 'dirC/dirD', 'dirC/dirD/fileD',
1699+
'dirC/fileC', 'dirC/novel.txt',
16931700
'dirE',
16941701
'fileA',
16951702
'linkA',

Lib/test/test_re.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2242,6 +2242,10 @@ def test_fullmatch_possessive_quantifiers(self):
22422242
self.assertIsNone(re.fullmatch(r'a*+', 'ab'))
22432243
self.assertIsNone(re.fullmatch(r'a?+', 'ab'))
22442244
self.assertIsNone(re.fullmatch(r'a{1,3}+', 'ab'))
2245+
self.assertTrue(re.fullmatch(r'a++b', 'ab'))
2246+
self.assertTrue(re.fullmatch(r'a*+b', 'ab'))
2247+
self.assertTrue(re.fullmatch(r'a?+b', 'ab'))
2248+
self.assertTrue(re.fullmatch(r'a{1,3}+b', 'ab'))
22452249

22462250
self.assertTrue(re.fullmatch(r'(?:ab)++', 'ab'))
22472251
self.assertTrue(re.fullmatch(r'(?:ab)*+', 'ab'))
@@ -2251,6 +2255,10 @@ def test_fullmatch_possessive_quantifiers(self):
22512255
self.assertIsNone(re.fullmatch(r'(?:ab)*+', 'abc'))
22522256
self.assertIsNone(re.fullmatch(r'(?:ab)?+', 'abc'))
22532257
self.assertIsNone(re.fullmatch(r'(?:ab){1,3}+', 'abc'))
2258+
self.assertTrue(re.fullmatch(r'(?:ab)++c', 'abc'))
2259+
self.assertTrue(re.fullmatch(r'(?:ab)*+c', 'abc'))
2260+
self.assertTrue(re.fullmatch(r'(?:ab)?+c', 'abc'))
2261+
self.assertTrue(re.fullmatch(r'(?:ab){1,3}+c', 'abc'))
22542262

22552263
def test_findall_possessive_quantifiers(self):
22562264
self.assertEqual(re.findall(r'a++', 'aab'), ['aa'])
@@ -2286,6 +2294,10 @@ def test_fullmatch_atomic_grouping(self):
22862294
self.assertIsNone(re.fullmatch(r'(?>a*)', 'ab'))
22872295
self.assertIsNone(re.fullmatch(r'(?>a?)', 'ab'))
22882296
self.assertIsNone(re.fullmatch(r'(?>a{1,3})', 'ab'))
2297+
self.assertTrue(re.fullmatch(r'(?>a+)b', 'ab'))
2298+
self.assertTrue(re.fullmatch(r'(?>a*)b', 'ab'))
2299+
self.assertTrue(re.fullmatch(r'(?>a?)b', 'ab'))
2300+
self.assertTrue(re.fullmatch(r'(?>a{1,3})b', 'ab'))
22892301

22902302
self.assertTrue(re.fullmatch(r'(?>(?:ab)+)', 'ab'))
22912303
self.assertTrue(re.fullmatch(r'(?>(?:ab)*)', 'ab'))
@@ -2295,6 +2307,10 @@ def test_fullmatch_atomic_grouping(self):
22952307
self.assertIsNone(re.fullmatch(r'(?>(?:ab)*)', 'abc'))
22962308
self.assertIsNone(re.fullmatch(r'(?>(?:ab)?)', 'abc'))
22972309
self.assertIsNone(re.fullmatch(r'(?>(?:ab){1,3})', 'abc'))
2310+
self.assertTrue(re.fullmatch(r'(?>(?:ab)+)c', 'abc'))
2311+
self.assertTrue(re.fullmatch(r'(?>(?:ab)*)c', 'abc'))
2312+
self.assertTrue(re.fullmatch(r'(?>(?:ab)?)c', 'abc'))
2313+
self.assertTrue(re.fullmatch(r'(?>(?:ab){1,3})c', 'abc'))
22982314

22992315
def test_findall_atomic_grouping(self):
23002316
self.assertEqual(re.findall(r'(?>a+)', 'aab'), ['aa'])
@@ -2307,6 +2323,10 @@ def test_findall_atomic_grouping(self):
23072323
self.assertEqual(re.findall(r'(?>(?:ab)?)', 'ababc'), ['ab', 'ab', '', ''])
23082324
self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab'])
23092325

2326+
def test_bug_gh91616(self):
2327+
self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
2328+
self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))
2329+
23102330

23112331
def get_debug_out(pat):
23122332
with captured_stdout() as out:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:mod:`re` module, fix :meth:`~re.Pattern.fullmatch` mismatch when using Atomic
2+
Grouping or Possessive Quantifiers.

Modules/_sre/sre_lib.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,8 +1259,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
12591259
/* Check for minimum required matches. */
12601260
while (ctx->count < (Py_ssize_t)pattern[1]) {
12611261
/* not enough matches */
1262-
DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1263-
&pattern[3]);
1262+
DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1263+
&pattern[3]);
12641264
if (ret) {
12651265
RETURN_ON_ERROR(ret);
12661266
ctx->count++;
@@ -1306,8 +1306,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
13061306

13071307
/* We have not reached the maximin matches, so try to
13081308
match once more. */
1309-
DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1310-
&pattern[3]);
1309+
DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1310+
&pattern[3]);
13111311

13121312
/* Check to see if the last attempted match
13131313
succeeded. */
@@ -1348,15 +1348,15 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
13481348
TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
13491349

13501350
/* Set the global Input pointer to this context's Input
1351-
pointer */
1351+
pointer */
13521352
state->ptr = ptr;
13531353

13541354
/* Evaluate the Atomic Group in a new context, terminating
13551355
when the end of the group, represented by a SUCCESS op
13561356
code, is reached. */
13571357
/* Group Pattern begins at an offset of 1 code. */
1358-
DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group,
1359-
&pattern[1]);
1358+
DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1359+
&pattern[1]);
13601360

13611361
/* Test Exit Condition */
13621362
RETURN_ON_ERROR(ret);

0 commit comments

Comments
 (0)
0