---
django/utils/text.py | 57 ++++++++++++++++++++++++++++++++--
docs/releases/3.2.25.txt | 8 +++++
tests/utils_tests/test_text.py | 26 ++++++++++++++++
3 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/django/utils/text.py b/django/utils/text.py
index 83e258fa81c7..88da9a2c2c6b 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -18,8 +18,61 @@ def capfirst(x):
return x and str(x)[0].upper() + str(x)[1:]
-# Set up regular expressions
-re_words = _lazy_re_compile(r'<[^>]+?>|([^<>\s]+)', re.S)
+# ----- Begin security-related performance workaround -----
+
+# We used to have, below
+#
+# re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+#
+# But it was shown that this regex, in the way we use it here, has some
+# catastrophic edge-case performance features. Namely, when it is applied to
+# text with only open brackets "<<<...". The class below provides the services
+# and correct answers for the use cases, but in these edge cases does it much
+# faster.
+re_notag = _lazy_re_compile(r"([^<>\s]+)", re.S)
+re_prt = _lazy_re_compile(r"<|([^<>\s]+)", re.S)
+
+
+class WordsRegex:
+ @staticmethod
+ def search(text, pos):
+ # Look for "<" or a non-tag word.
+ partial = re_prt.search(text, pos)
+ if partial is None or partial[1] is not None:
+ return partial
+
+ # "<" was found, look for a closing ">".
+ end = text.find(">", partial.end(0))
+ if end < 0:
+ # ">" cannot be found, look for a word.
+ return re_notag.search(text, pos + 1)
+ else:
+ # "<" followed by a ">" was found -- fake a match.
+ end += 1
+ return FakeMatch(text[partial.start(0): end], end)
+
+
+class FakeMatch:
+ __slots__ = ["_text", "_end"]
+
+ def end(self, group=0):
+ assert group == 0, "This specific object takes only group=0"
+ return self._end
+
+ def __getitem__(self, group):
+ if group == 1:
+ return None
+ assert group == 0, "This specific object takes only group in {0,1}"
+ return self._text
+
+ def __init__(self, text, end):
+ self._text, self._end = text, end
+
+
+# ----- End security-related performance workaround -----
+
+# Set up regular expressions.
+re_words = WordsRegex
re_chars = _lazy_re_compile(r'<[^>]+?>|(.)', re.S)
re_tag = _lazy_re_compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S)
re_newlines = _lazy_re_compile(r'\r\n|\r') # Used in normalize_newlines
diff --git a/docs/releases/3.2.25.txt b/docs/releases/3.2.25.txt
index aa81c720d595..a3a90986ff27 100644
--- a/docs/releases/3.2.25.txt
+++ b/docs/releases/3.2.25.txt
@@ -7,6 +7,14 @@ Django 3.2.25 release notes
Django 3.2.25 fixes a security issue with severity "moderate" and a regression
in 3.2.24.
+CVE-2024-27351: Potential regular expression denial-of-service in ``django.utils.text.Truncator.words()``
+=========================================================================================================
+
+``django.utils.text.Truncator.words()`` method (with ``html=True``) and
+:tfilter:`truncatewords_html` template filter were subject to a potential
+regular expression denial-of-service attack using a suitably crafted string
+(follow up to :cve:`2019-14232` and :cve:`2023-43665`).
+
Bugfixes
========
diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py
index 0a6f0bc3f260..758919c66e81 100644
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@@ -159,6 +159,32 @@ def test_truncate_html_words(self):
truncator = text.Truncator('I <3 python, what about you?
')
self.assertEqual('I <3 python,…
', truncator.words(3, html=True))
+ # Only open brackets.
+ test = "<" * 60_000
+ truncator = text.Truncator(test)
+ self.assertEqual(truncator.words(1, html=True), test)
+
+ # Tags with special chars in attrs.
+ truncator = text.Truncator(
+ """Hello, my dear lady!"""
+ )
+ self.assertEqual(
+ """Hello, my dear…""",
+ truncator.words(3, html=True),
+ )
+
+ # Tags with special non-latin chars in attrs.
+ truncator = text.Truncator("""Hello, my dear lady!
""")
+ self.assertEqual(
+ """Hello, my dear…
""",
+ truncator.words(3, html=True),
+ )
+
+ # Misplaced brackets.
+ truncator = text.Truncator("hello >< world")
+ self.assertEqual(truncator.words(1, html=True), "hello…")
+ self.assertEqual(truncator.words(2, html=True), "hello >< world")
+
@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
def test_truncate_words_html_size_limit(self):
max_len = text.Truncator.MAX_LENGTH_HTML
From c98eca322af87adf046ab621e7c8a23d340f7afe Mon Sep 17 00:00:00 2001
From: Mariusz Felisiak
Date: Mon, 4 Mar 2024 08:48:18 +0100
Subject: [PATCH 6/6] [3.2.x] Bumped version for 3.2.25 release.
---
django/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/django/__init__.py b/django/__init__.py
index 920354ca1f56..9207083317e0 100644
--- a/django/__init__.py
+++ b/django/__init__.py
@@ -1,6 +1,6 @@
from django.utils.version import get_version
-VERSION = (3, 2, 25, 'alpha', 0)
+VERSION = (3, 2, 25, 'final', 0)
__version__ = get_version(VERSION)