8000 Add more tests for group names and refs in RE (GH-91695) · python/cpython@c213ccc · GitHub
[go: up one dir, main page]

Skip to content

Commit c213ccc

Browse files
Add more tests for group names and refs in RE (GH-91695)
(cherry picked from commit 7407008) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent b865a66 commit c213ccc

File tree

1 file changed

+41
-15
lines changed

1 file changed

+41
-15
lines changed

Lib/test/test_re.py

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,16 @@ def test_symbolic_groups(self):
218218
re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
219219
re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
220220
re.compile(r'(?P<a1>x)\1(?(1)y)')
221+
re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
222+
# New valid identifiers in Python 3
223+
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
224+
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
225+
# Support > 100 groups.
226+
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
227+
pat = '(?:%s)(?(200)z|t)' % pat
228+
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
229+
230+
def test_symbolic_groups_errors(self):
221231
self.checkPatternError(r'(?P<a>)(?P<a>)',
222232
"redefinition of group name 'a' as group 2; "
223233
"was group 1")
@@ -243,16 +253,22 @@ def test_symbolic_groups(self):
243253
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
244254
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
245255
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
246-
# New valid/invalid identifiers in Python 3
247-
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
248-
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
249256
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
257+
self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
258+
self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3)
259+
260+
def test_symbolic_refs(self):
261+
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
262+
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
263+
self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
264+
# New valid identifiers in Python 3
265+
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
266+
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
250267
# Support > 100 groups.
251268
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
252-
pat = '(?:%s)(?(200)z|t)' % pat
253-
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
269+
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
254270

255-
def test_symbolic_refs(self):
271+
def test_symbolic_refs_errors(self):
256272
self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
257273
'missing >, unterminated name', 3)
258274
self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
@@ -270,18 +286,14 @@ def test_symbolic_refs(self):
270286
'invalid group reference 2', 1)
271287
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
272288
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
273-
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
274-
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
275289
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
276290
"bad character in group name '-1'", 3)
277-
# New valid/invalid identifiers in Python 3
278-
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
279-
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
280291
self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
281292
"bad character in group name '©'", 3)
282-
# Support > 100 groups.
283-
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
284-
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
293+
self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
294+
"bad character in group name '㊀'", 3)
295+
self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
296+
"bad character in group name '¹'", 3)
285297

286298
def test_re_subn(self):
287299
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@@ -543,9 +555,23 @@ def test_re_groupref_exists(self):
543555
pat = '(?:%s)(?(200)z)' % pat
544556
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
545557

546-
self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
558+
def test_re_groupref_exists_errors(self):
559+
self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
560+
self.checkPatternError(r'()(?(-1)a|b)',
561+
"bad character in group name '-1'", 5)
562+
self.checkPatternError(r'()(?(㊀)a|b)',
563+
"bad character in group name '㊀'", 5)
564+
self.checkPatternError(r'()(?(¹)a|b)',
565+
"bad character in group name '¹'", 5)
566+
self.checkPatternError(r'()(?(1',
567+
"missing ), unterminated name", 5)
568+
self.checkPatternError(r'()(?(1)a',
569+
"missing ), unterminated subpattern", 2)
547570
self.checkPatternError(r'()(?(1)a|b',
548571
'missing ), unterminated subpattern', 2)
572+
self.checkPatternError(r'()(?(1)a|b|c',
573+
'conditional backref with more than '
574+
'two branches', 10)
549575
self.checkPatternError(r'()(?(1)a|b|c)',
550576
'conditional backref with more than '
551577
'two branches', 10)

0 commit comments

Comments
 (0)
0