8000 gh-103285: Rewrite _splitlines_no_ff to improve performance (#103307) · python/cpython@3686013 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3686013

Browse files
gh-103285: Rewrite _splitlines_no_ff to improve performance (#103307)
1 parent f0ed293 commit 3686013

File tree

3 files changed

+20
-18
lines changed

3 files changed

+20
-18
lines changed

Lib/ast.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
:license: Python License.
2626
"""
2727
import sys
28+
import re
2829
from _ast import *
2930
from contextlib import contextmanager, nullcontext
3031
from enum import IntEnum, auto, _simple_enum
@@ -305,28 +306,17 @@ def get_docstring(node, clean=True):
305306
return text
306307

307308

308-
def _splitlines_no_ff(source):
309+
_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))")
310+
def _splitlines_no_ff(source, maxlines=None):
309311
"""Split a string into lines ignoring form feed and other chars.
310312
311313
This mimics how the Python parser splits source code.
312314
"""
313-
idx = 0
314315
lines = []
315-
next_line = ''
316-
while idx < len(source):
317-
c = source[idx]
318-
next_line += c
319-
idx += 1
320-
# Keep \r\n together
321-
if c == '\r' and idx < len(source) and source[idx] == '\n':
322-
next_line += '\n'
323-
idx += 1
324-
if c in '\r\n':
325-
lines.append(next_line)
326-
next_line = ''
327-
328-
if next_line:
329-
lines.append(next_line)
316+
for lineno, match in enumerate(_line_pattern.finditer(source), 1):
317+
if maxlines is not None and lineno > maxlines:
318+
break
319+
lines.append(match[0])
330320
return lines
331321

332322

@@ -360,7 +350,7 @@ def get_source_segment(source, node, *, padded=False):
360350
except AttributeError:
361351
return None
362352

363-
lines = _splitlines_no_ff(source)
353+
lines = _splitlines_no_ff(source, maxlines=end_lineno+1)
364354
if end_lineno == lineno:
365355
return lines[lineno].encode()[col_offset:end_col_offset].decode()
366356

Lib/test/test_ast.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2293,6 +2293,17 @@ class C:
22932293
cdef = ast.parse(s).body[0]
22942294
self.assertEqual(ast.get_source_segment(s, cdef.body[0], padded=True), s_method)
22952295

2296+
def test_source_segment_newlines(self):
2297+
s = 'def f():\n pass\ndef g():\r pass\r\ndef h():\r\n pass\r\n'
2298+
f, g, h = ast.parse(s).body
2299+
self._check_content(s, f, 'def f():\n pass')
2300+
self._check_content(s, g, 'def g():\r pass')
2301+
self._check_content(s, h, 'def h():\r\n pass')
2302+
2303+
s = 'def f():\n a = 1\r b = 2\r\n c = 3\n'
2304+
f = ast.parse(s).body[0]
2305+
self._check_content(s, f, s.rstrip())
2306+
22962307
def test_source_segment_missing_info(self):
22972308
s = 'v = 1\r\nw = 1\nx = 1\n\ry = 1\r\n'
22982309
v, w, x, y = ast.parse(s).body
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve performance of :func:`ast.get_source_segment`.

0 commit comments

Comments
 (0)
0