8000 gh-91616: re module, fix .fullmatch() mismatch when using Atomic Grouping or Possessive Quantifiers · Pull Request #91681 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-91616: re module, fix .fullmatch() mismatch when using Atomic Grouping or Possessive Quantifiers #91681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,7 @@ class _BasePathTest(object):
# | |-- dirD
# | | `-- fileD
# | `-- fileC
# | `-- novel.txt
# |-- dirE # No permissions
# |-- fileA
# |-- linkA -> fileA
Expand All @@ -1412,6 +1413,8 @@ def cleanup():
f.write(b"this is file B\n")
with open(join('dirC', 'fileC'), 'wb') as f:
f.write(b"this is file C\n")
with open(join('dirC', 'novel.txt'), 'wb') as f:
f.write(b"this is a novel\n")
with open(join('dirC', 'dirD', 'fileD'), 'wb') as f:
f.write(b"this is file D\n")
os.chmod(join('dirE'), 0)
Expand Down Expand Up @@ -1679,6 +1682,9 @@ def _check(glob, expected):
p = P(BASE, "dirC")
_check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"])
_check(p.rglob("*/*"), ["dirC/dirD/fileD"])
# gh-91616, a re module regression
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
_check(p.rglob("*.*"), ["dirC/novel.txt"])

@os_helper.skip_unless_symlink
def test_rglob_symlink_loop(self):
Expand All @@ -1689,7 +1695,8 @@ def test_rglob_symlink_loop(self):
expect = {'brokenLink',
'dirA', 'dirA/linkC',
'dirB', 'dirB/fileB', 'dirB/linkD',
'dirC', 'dirC/dirD', 'dirC/dirD/fileD', 'dirC/fileC',
'dirC', 'dirC/dirD', 'dirC/dirD/fileD',
'dirC/fileC', 'dirC/novel.txt',
'dirE',
'fileA',
'linkA',
Expand Down
20 changes: 20 additions & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2173,6 +2173,10 @@ def test_fullmatch_possessive_quantifiers(self):
self.assertIsNone(re.fullmatch(r'a*+', 'ab'))
self.assertIsNone(re.fullmatch(r'a?+', 'ab'))
self.assertIsNone(re.fullmatch(r'a{1,3}+', 'ab'))
self.assertTrue(re.fullmatch(r'a++b', 'ab'))
self.assertTrue(re.fullmatch(r'a*+b', 'ab'))
self.assertTrue(re.fullmatch(r'a?+b', 'ab'))
self.assertTrue(re.fullmatch(r'a{1,3}+b', 'ab'))

self.assertTrue(re.fullmatch(r'(?:ab)++', 'ab'))
self.assertTrue(re.fullmatch(r'(?:ab)*+', 'ab'))
Expand All @@ -2182,6 +2186,10 @@ def test_fullmatch_possessive_quantifiers(self):
self.assertIsNone(re.fullmatch(r'(?:ab)*+', 'abc'))
self.assertIsNone(re.fullmatch(r'(?:ab)?+', 'abc'))
self.assertIsNone(re.fullmatch(r'(?:ab){1,3}+', 'abc'))
self.assertTrue(re.fullmatch(r'(?:ab)++c', 'abc'))
self.assertTrue(re.fullmatch(r'(?:ab)*+c', 'abc'))
self.assertTrue(re.fullmatch(r'(?:ab)?+c', 'abc'))
self.assertTrue(re.fullmatch(r'(?:ab){1,3}+c', 'abc'))

def test_findall_possessive_quantifiers(self):
self.assertEqual(re.findall(r'a++', 'aab'), ['aa'])
Expand Down Expand Up @@ -2217,6 +2225,10 @@ def test_fullmatch_atomic_grouping(self):
self.assertIsNone(re.fullmatch(r'(?>a*)', 'ab'))
self.assertIsNone(re.fullmatch(r'(?>a?)', 'ab'))
self.assertIsNone(re.fullmatch(r'(?>a{1,3})', 'ab'))
self.assertTrue(re.fullmatch(r'(?>a+)b', 'ab'))
self.assertTrue(re.fullmatch(r'(?>a*)b', 'ab'))
self.assertTrue(re.fullmatch(r'(?>a?)b', 'ab'))
self.assertTrue(re.fullmatch(r'(?>a{1,3})b', 'ab'))

self.assertTrue(re.fullmatch(r'(?>(?:ab)+)', 'ab'))
self.assertTrue(re.fullmatch(r'(?>(?:ab)*)', 'ab'))
Expand All @@ -2226,6 +2238,10 @@ def test_fullmatch_atomic_grouping(self):
self.assertIsNone(re.fullmatch(r'(?>(?:ab)*)', 'abc'))
self.assertIsNone(re.fullmatch(r'(?>(?:ab)?)', 'abc'))
self.assertIsNone(re.fullmatch(r'(?>(?:ab){1,3})', 'abc'))
self.assertTrue(re.fullmatch(r'(?>(?:ab)+)c', 'abc'))
self.assertTrue(re.fullmatch(r'(?>(?:ab)*)c', 'abc'))
self.assertTrue(re.fullmatch(r'(?>(?:ab)?)c', 'abc'))
self.assertTrue(re.fullmatch(r'(?>(?:ab){1,3})c', 'abc'))

def test_findall_atomic_grouping(self):
self.assertEqual(re.findall(r'(?>a+)', 'aab'), ['aa'])
Expand All @@ -2238,6 +2254,10 @@ def test_findall_atomic_grouping(self):
self.assertEqual(re.findall(r'(?>(?:ab)?)', 'ababc'), ['ab', 'ab', '', ''])
self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab'])

def test_bug_gh91616(self):
self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))


def get_debug_out(pat):
with captured_stdout() as out:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`re` module, fix :meth:`~re.Pattern.fullmatch` mismatch when using Atomic
Grouping or Possessive Quantifiers.
14 changes: 7 additions & 7 deletions Modules/_sre/sre_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -1254,8 +1254,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* Check for minimum required matches. */
while (ctx->count < (Py_ssize_t)pattern[1]) {
/* not enough matches */
DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
&pattern[3]);
DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
&pattern[3]);
if (ret) {
RETURN_ON_ERROR(ret);
ctx->count++;
Expand Down Expand Up @@ -1301,8 +1301,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)

/* We have not reached the maximin matches, so try to
match once more. */
DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
&pattern[3]);
DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
&pattern[3]);

/* Check to see if the last attempted match
succeeded. */
Expand Down Expand Up @@ -1343,15 +1343,15 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));

/* Set the global Input pointer to this context's Input
pointer */
pointer */
state->ptr = ptr;

/* Evaluate the Atomic Group in a new context, terminating
when the end of the group, represented by a SUCCESS op
code, is reached. */
/* Group Pattern begins at an offset of 1 code. */
DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group,
&pattern[1]);
DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
&pattern[1]);

/* Test Exit Condition */
RETURN_ON_ERROR(ret);
Expand Down
0