10000 00385: gh-91404: Revert "bpo-23689: re module, fix memory leak..." · fedora-python/cpython@f35fa1a · GitHub
[go: up one dir, main page]

Skip to content

Commit f35fa1a

Browse files
00385: pythongh-91404: Revert "bpo-23689: re module, fix memory leak..."
This fixes a speed regression in the re module which prevented chromium from building in Fedora. Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure" This reverts commit 6e3eee5. Manual fixups to increase the MAGIC number and to handle conflicts with a couple of changes that landed after that. (cherry picked from commit 4beee0c) Co-authored-by: Gregory P. Smith <greg@krypto.org> pythongh-94675: Add a regression test for rjsmin re slowdown Co-authored-by: Miro Hrončok <miro@hroncok.cz>
1 parent a3b9665 commit f35fa1a

File tree

10 files changed

+105
-147
lines changed

10 files changed

+105
-147
lines changed

Lib/re/_compiler.py

Lines changed: 21 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,14 @@
2828
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
2929
}
3030

31-
class _CompileData:
32-
__slots__ = ('code', 'repeat_count')
33-
def __init__(self):
34-
self.code = []
35-
self.repeat_count = 0
36-
3731
def _combine_flags(flags, add_flags, del_flags,
3832
TYPE_FLAGS=_parser.TYPE_FLAGS):
3933
if add_flags & TYPE_FLAGS:
4034
flags &= ~TYPE_FLAGS
4135
return (flags | add_flags) & ~del_flags
4236

43-
def _compile(data, pattern, flags):
37+
def _compile(code, pattern, flags):
4438
# internal: compile a (sub)pattern
45-
code = data.code
4639
emit = code.append
4740
_len = len
4841
LITERAL_CODES = _LITERAL_CODES
@@ -115,19 +108,15 @@ def _compile(data, pattern, flags):
115108
skip = _len(code); emit(0)
116109
emit(av[0])
117110
emit(av[1])
118-
_compile(data, av[2], flags)
111+
_compile(code, av[2], flags)
119112
emit(SUCCESS)
120113
code[skip] = _len(code) - skip
121114
else:
122115
emit(REPEATING_CODES[op][0])
123116
skip = _len(code); emit(0)
124117
emit(av[0])
125118
emit(av[1])
126-
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
127-
if op != POSSESSIVE_REPEAT:
128-
emit(data.repeat_count)
129-
data.repeat_count += 1
130-
_compile(data, av[2], flags)
119+
_compile(code, av[2], flags)
131120
code[skip] = _len(code) - skip
132121
emit(REPEATING_CODES[op][1])
133122
elif op is SUBPATTERN:
@@ -136,7 +125,7 @@ def _compile(data, pattern, flags):
136125
emit(MARK)
137126
emit((group-1)*2)
138127
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
139-
_compile(data, p, _combine_flags(flags, add_flags, del_flags))
128+
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
140129
if group:
141130
emit(MARK)
142131
emit((group-1)*2+1)
@@ -148,7 +137,7 @@ def _compile(data, pattern, flags):
148137
# pop their stack if they reach it
149138
emit(ATOMIC_GROUP)
150139
skip = _len(code); emit(0)
151-
_compile(data, av, flags)
140+
_compile(code, av, flags)
152141
emit(SUCCESS)
153142
code[skip] = _len(code) - skip
154143
elif op in SUCCESS_CODES:
@@ -163,7 +152,7 @@ def _compile(data, pattern, flags):
163152
if lo != hi:
164153
raise error("look-behind requires fixed-width pattern")
165154
emit(lo) # look behind
166-
_compile(data, av[1], flags)
155+
_compile(code, av[1], flags)
167156
emit(SUCCESS)
168157
code[skip] = _len(code) - skip
169158
elif op is AT:
@@ -182,7 +171,7 @@ def _compile(data, pattern, flags):
182171
for av in av[1]:
183172
skip = _len(code); emit(0)
184173
# _compile_info(code, av, flags)
185-
_compile(data, av, flags)
174+
_compile(code, av, flags)
186175
emit(JUMP)
187176
tailappend(_len(code)); emit(0)
188177
code[skip] = _len(code) - skip
@@ -210,12 +199,12 @@ def _compile(data, pattern, flags):
210199
emit(op)
211200
emit(av[0]-1)
212201
skipyes = _len(code); emit(0)
213-
_compile(data, av[1], flags)
202+
_compile(code, av[1], flags)
214203
if av[2]:
215204
emit(JUMP)
216205
skipno = _len(code); emit(0)
217206
code[skipyes] = _len(code) - skipyes + 1
218-
_compile(data, av[2], flags)
207+
_compile(code, av[2], flags)
219208
code[skipno] = _len(code) - skipno
220209
else:
221210
code[skipyes] = _len(code) - skipyes + 1
@@ -582,17 +571,17 @@ def isstring(obj):
582571
def _code(p, flags):
583572

584573
flags = p.state.flags | flags
585-
data = _CompileData()
574+
code = []
586575

587576
# compile info block
588-
_compile_info(data.code, p, flags)
577+
_compile_info(code, p, flags)
589578

590579
# compile the pattern
591-
_compile(data, p.data, flags)
580+
_compile(code, p.data, flags)
592581

593-
data.code.append(SUCCESS)
582+
code.append(SUCCESS)
594583

595-
return data
584+
return code
596585

597586
def _hex_code(code):
598587
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
@@ -693,21 +682,14 @@ def print_2(*args):
693682
else:
694683
print_(FAILURE)
695684
i += 1
696-
elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
685+
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
697686
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
698687
skip, min, max = code[i: i+3]
699688
if max == MAXREPEAT:
700689
max = 'MAXREPEAT'
701690
print_(op, skip, min, max, to=i+skip)
702691
dis_(i+3, i+skip)
703692
i += skip
704-
elif op is REPEAT:
705-
skip, min, max, repeat_index = code[i: i+4]
706-
if max == MAXREPEAT:
707-
max = 'MAXREPEAT'
708-
print_(op, skip, min, max, repeat_index, to=i+skip)
709-
dis_(i+4, i+skip)
710-
i += skip
711693
elif op is GROUPREF_EXISTS:
712694
arg, skip = code[i: i+2]
713695
print_(op, arg, skip, to=i+skip)
@@ -762,11 +744,11 @@ def compile(p, flags=0):
762744
else:
763745
pattern = None
764746

765-
data = _code(p, flags)
747+
code = _code(p, flags)
766748

767749
if flags & SRE_FLAG_DEBUG:
768750
print()
769-
dis(data.code)
751+
dis(code)
770752

771753
# map in either direction
772754
groupindex = p.state.groupdict
@@ -775,6 +757,7 @@ def compile(p, flags=0):
775757
indexgroup[i] = k
776758

777759
return _sre.compile(
778-
pattern, flags | p.state.flags, data.code,
779-
p.state.groups-1, groupindex, tuple(indexgroup),
780-
data.repeat_count)
760+
pattern, flags | p.state.flags, code,
761+
p.state.groups-1,
762+
groupindex, tuple(indexgroup)
763+
)

Lib/re/_constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# update when constants are added or removed
1515

16-
MAGIC = 20220423
16+
MAGIC = 20220615
1717

1818
from _sre import MAXREPEAT, MAXGROUPS
1919

Lib/test/test_re.py

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from test.support import (gc_collect, bigmemtest, _2G,
22
cpython_only, captured_stdout,
3-
check_disallow_instantiation, is_emscripten, is_wasi)
3+
check_disallow_instantiation, is_emscripten, is_wasi,
4+
SHORT_TIMEOUT)
45
import locale
56
import re
67
import string
@@ -11,6 +12,14 @@
1112
from re import Scanner
1213
from weakref import proxy
1314

15+
# some platforms lack working multiprocessing
16+
try:
17+
import _multiprocessing
18+
except ImportError:
19+
multiprocessing = None
20+
else:
21+
import multiprocessing
22+
1423
# Misc tests from Tim Peters' re.doc
1524

1625
# WARNING: Don't change details in these tests if you don't know
@@ -1796,12 +1805,9 @@ def test_dealloc(self):
17961805
long_overflow = 2**128
17971806
self.assertRaises(TypeError, re.finditer, "a", {})
17981807
with self.assertRaises(OverflowError):
1799-
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0)
1808+
_sre.compile("abc", 0, [long_overflow], 0, {}, ())
18001809
with self.assertRaises(TypeError):
1801-
_sre.compile({}, 0, [], 0, [], [], 0)
1802-
with self.assertRaises(RuntimeError):
1803-
# invalid repeat_count -1
1804-
_sre.compile("abc", 0, [1], 0, {}, (), -1)
1810+
_sre.compile({}, 0, [], 0, [], [])
18051811

18061812
def test_search_dot_unicode(self):
18071813
self.assertTrue(re.search("123.*-", '123abc-'))
@@ -2441,6 +2447,26 @@ def test_template_function_and_flag_is_deprecated(self):
24412447
self.assertTrue(template_re1.match('ahoy'))
24422448
self.assertFalse(template_re1.match('nope'))
24432449

2450+
@unittest.skipIf(multiprocessing is None, 'test requires multiprocessing')
2451+
def test_regression_gh94675(self):
2452+
pattern = re.compile(r'(?<=[({}])(((//[^\n]*)?[\n])([\000-\040])*)*'
2453+
r'((/[^/\[\n]*(([^\n]|(\[\n]*(]*)*\]))'
2454+
r'[^/\[]*)*/))((((//[^\n]*)?[\n])'
2455+
r'([\000-\040]|(/\*[^*]*\*+'
2456+
r'([^/*]\*+)*/))*)+(?=[^\000-\040);\]}]))')
2457+
input_js = '''a(function() {
2458+
///////////////////////////////////////////////////////////////////
2459+
});'''
2460+
p = multiprocessing.Process(target=pattern.sub, args=('', input_js))
2461+
p.start()
2462+
p.join(SHORT_TIMEOUT)
2463+
try:
2464+
self.assertFalse(p.is_alive(), 'pattern.sub() timed out')
2465+
finally:
2466+
if p.is_alive():
2467+
p.terminate()
2468+
p.join()
2469+
24442470

24452471
def get_debug_out(pat):
24462472
with captured_stdout() as out:
@@ -2540,27 +2566,6 @@ def test_possesive_repeat(self):
25402566
14. SUCCESS
25412567
''')
25422568

2543-
def test_repeat_index(self):
2544-
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
2545-
MIN_REPEAT 0 MAXREPEAT
2546-
LITERAL 97
2547-
LITERAL 98
2548-
MAX_REPEAT 0 MAXREPEAT
2549-
LITERAL 99
2550-
LITERAL 100
2551-
2552-
0. INFO 4 0b0 0 MAXREPEAT (to 5)
2553-
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
2554-
10. LITERAL 0x61 ('a')
2555-
12. LITERAL 0x62 ('b')
2556-
14: MIN_UNTIL
2557-
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
2558-
20. LITERAL 0x63 ('c')
2559-
22. LITERAL 0x64 ('d')
2560-
24: MAX_UNTIL
2561-
25. SUCCESS
2562-
''')
2563-
25642569

25652570
class PatternReprTests(unittest.TestCase):
25662571
def check(self, pattern, expected):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Revert the :mod:`re` memory leak when a match is terminated by a signal or
2+
memory allocation failure as the implemented fix caused a major performance
3+
regression.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a regression test for :mod:`re` exponentional slowdown when using rjsmin.

Modules/_sre/clinic/sre.c.h

Lines changed: 7 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
0