8000 gh-130167: Optimise ``textwrap.dedent()`` (#131919) · python/cpython@6aa88a2 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 6aa88a2

Browse files
AA-TurnerMarius-Justoneendebakptpicnixz
authored
gh-130167: Optimise textwrap.dedent() (#131919)
Co-authored-by: Marius Juston <marius.juston@hotmail.fr> Co-authored-by: Pieter Eendebak <pieter.eendebak@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
1 parent 685fd74 commit 6aa88a2

File tree

3 files changed

+68
-37
lines changed

3 files changed

+68
-37
lines changed

Lib/test/test_textwrap.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,56 @@ def assertUnchanged(self, text):
769769
"""assert that dedent() has no effect on 'text'"""
770770
self.assertEqual(text, dedent(text))
771771

772+
def test_dedent_whitespace(self):
773+
# The empty string.
774+
text = ""
775+
self.assertUnchanged(text)
776+
777+
# Only spaces.
778+
text = " "
779+
expect = ""
780+
self.assertEqual(expect, dedent(text))
781+
782+
# Only tabs.
783+
text = "\t\t\t\t"
784+
expect = ""
785+
self.assertEqual(expect, dedent(text))
786+
787+
# A mixture.
788+
text = " \t \t\t \t "
789+
expect = ""
790+
self.assertEqual(expect, dedent(text))
791+
792+
# ASCII whitespace.
793+
text = "\f\n\r\t\v "
794+
expect = "\n"
795+
self.assertEqual(expect, dedent(text))
796+
797+
# One newline.
798+
text = "\n"
799+
expect = "\n"
800+
self.assertEqual(expect, dedent(text))
801+
802+
# Windows-style newlines.
803+
text = "\r\n" * 5
804+
expect = "\n" * 5
805+
self.assertEqual(expect, dedent(text))
806+
807+
# Whitespace mixture.
808+
text = " \n\t\n \n\t\t\n\n\n "
809+
expect = "\n\n\n\n\n\n"
810+
self.assertEqual(expect, dedent(text))
811+
812+
# Lines consisting only of whitespace are always normalised
813+
text = "a\n \n\t\n"
814+
expect = "a\n\n\n"
815+
self.assertEqual(expect, dedent(text))
816+
817+
# Whitespace characters on non-empty lines are retained
818+
text = "a\r\n\r\n\r\n"
819+
expect = "a\r\n\n\n"
820+
self.assertEqual(expect, dedent(text))
821+
772822
def test_dedent_nomargin(self):
773823
# No lines indented.
774824
text = "Hello there.\nHow are you?\nOh good, I'm glad."

Lib/textwrap.py

Lines changed: 13 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):
413413

414414
# -- Loosely related functionality -------------------------------------
415415

416-
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
417-
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
418-
419416
def dedent(text):
420417
"""Remove any common leading whitespace from every line in `text`.
421418
@@ -429,42 +426,21 @@ def dedent(text):
429426
430427
Entirely blank lines are normalized to a newline character.
431428
"""
432-
# Look for the longest leading string of spaces and tabs common to
433-
# all lines.
434-
margin = None
435-
text = _whitespace_only_re.sub('', text)
436-
indents = _leading_whitespace_re.findall(text)
437-
for indent in indents:
438-
if margin is None:
439-
margin = indent
440-
441-
# Current line more deeply indented than previous winner:
442-
# no change (previous winner is still on top).
443-
elif indent.startswith(margin):
444-
pass
445-
446-
# Current line consistent with and no deeper than previous winner:
447-
# it's the new winner.
448-
elif margin.startswith(indent):
449-
margin = indent
450-
451-
# Find the largest common whitespace between current line and previous
452-
# winner.
453-
else:
454-
for i, (x, y) in enumerate(zip(margin, indent)):
455-
if x != y:
456-
margin = margin[:i]
457-
break
429+
if not text:
430+
return text
431+
432+
lines = text.split('\n')
458433

459-
# sanity check (testing/debugging only)
460-
if 0 and margin:
461-
for line in text.split("\n"):
462-
assert not line or line.startswith(margin), \
463-
"line = %r, margin = %r" % (line, margin)
434+
# Get length of leading whitespace, inspired by ``os.path.commonprefix()``.
435+
non_blank_lines = [l for l in lines if l and not l.isspace()]
436+
l1 = min(non_blank_lines, default='')
437+
l2 = max(non_blank_lines, default='')
438+
margin = 0
439+
for margin, c in enumerate(l1):
440+
if c != l2[margin] or c not in ' \t':
441+
break
464442

465-
if margin:
466-
text = re.sub(r'(?m)^' + margin, '', text)
467-
return text
443+
return '\n'.join([l[margin:] if not l.isspace() else '' for l in lines])
468444

469445

470446
def indent(text, prefix, predicate=None):
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Improved performance of :func:`textwrap.dedent` by an average of ~2.4x,
2+
(with improvements of up to 4x for large inputs),
3+
and fixed a bug where blank lines with whitespace characters other than space
4+
or horizontal tab were not normalised to the newline.
5+
Patch by Adam Turner, Marius Juston, and Pieter Eendebak.

0 commit comments

Comments
 (0)
0