8000 [3.10] Add more tests for group names and refs in RE (GH-91695) by miss-islington · Pull Request #91698 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.10] Add more tests 8000 for group names and refs in RE (GH-91695) #91698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 19, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
< 8000 !-- -->
Diff view
56 changes: 41 additions & 15 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,16 @@ def test_symbolic_groups(self):
re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
re.compile(r'(?P<a1>x)\1(?(1)y)')
re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
# New valid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
pat = '(?:%s)(?(200)z|t)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

def test_symbolic_groups_errors(self):
self.checkPatternError(r'(?P<a>)(?P<a>)',
"redefinition of group name 'a' as group 2; "
"was group 1")
Expand All @@ -243,16 +253,22 @@ def test_symbolic_groups(self):
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
# New valid/invalid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3)

def test_symbolic_refs(self):
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
# New valid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
pat = '(?:%s)(?(200)z|t)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')

def test_symbolic_refs(self):
def test_symbolic_refs_errors(self):
self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
'missing >, unterminated name', 3)
self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
Expand All @@ -270,18 +286,14 @@ def test_symbolic_refs(self):
'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
"bad character in group name '-1'", 3)
# New valid/invalid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
"bad character in group name '©'", 3)
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
"bad character in group name '㊀'", 3)
self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
"bad character in group name '¹'", 3)

def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
Expand Down Expand Up @@ -543,9 +555,23 @@ def test_re_groupref_exists(self):
pat = '(?:%s)(?(200)z)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
def test_re_groupref_exists_errors(self):
self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
self.checkPatternError(r'()(?(-1)a|b)',
"bad character in group name '-1'", 5)
self.checkPatternError(r'()(?(㊀)a|b)',
"bad character in group name '㊀'", 5)
self.checkPatternError(r'()(?(¹)a|b)',
"bad character in group name '¹'", 5)
self.checkPatternError(r'()(?(1',
"missing ), unterminated name", 5)
self.checkPatternError(r'()(?(1)a',
"missing ), unterminated subpattern", 2)
self.checkPatternError(r'()(?(1)a|b',
'missing ), unterminated subpattern', 2)
self.checkPatternError(r'()(?(1)a|b|c',
'conditional backref with more than '
'two branches', 10)
self.checkPatternError(r'()(?(1)a|b|c)',
'conditional backref with more than '
'two branches', 10)
Expand Down
0