8000 [5.1.x] Fixed CVE-2025-64460 -- Corrected quadratic inner text accumu… · django/django@0db9ea4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0db9ea4

Browse files
shaibnessita
authored andcommitted
[5.1.x] Fixed CVE-2025-64460 -- Corrected quadratic inner text accumulation in XML serializer.
Previously, `getInnerText()` recursively used `list.extend()` on strings, which added each character from child nodes as a separate list element. On deeply nested XML content, this caused the overall deserialization work to grow quadratically with input size, potentially allowing disproportionate CPU consumption for crafted XML. The fix separates collection of inner texts from joining them, so that each subtree is joined only once, reducing the complexity to linear in the size of the input. These changes also include a mitigation for a xml.dom.minidom performance issue. Thanks Seokchan Yoon (https://ch4n3.kr/) for report. Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com> Backport of 50efb71 from main.
1 parent 9c6a5bd commit 0db9ea4

File tree

5 files changed

+109
-7
lines changed
Filter options

5 files changed

+109
-7
lines changed

django/core/serializers/xml_serializer.py

+33Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"""
44

55
import json
6-
from xml.dom import pulldom
6+
from contextlib import contextmanager
7+
from xml.dom import minidom, pulldom
78
from xml.sax import handler
89
from xml.sax.expatreader import ExpatParser as _ExpatParser
910

@@ -15,6 +16,25 @@
1516
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
1617

1718

19+
@contextmanager
20+
def fast_cache_clearing():
21+
"""Workaround for performance issues in minidom document checks.
22+
23+
Speeds up repeated DOM operations by skipping unnecessary full traversal
24+
of the DOM tree.
25+
"""
26+
module_helper_was_lambda = False
27+
if original_fn := getattr(minidom, "_in_document", None):
28+
module_helper_was_lambda = original_fn.__name__ == "<lambda>"
29+
if not module_helper_was_lambda:
30+
minidom._in_document = lambda node: bool(node.ownerDocument)
31+
try:
32+
yield
33+
finally:
34+
if original_fn and not module_helper_was_lambda:
35+
minidom._in_document = original_fn
36+
37+
1838
class Serializer(base.Serializer):
1939
"""Serialize a QuerySet to XML."""
2040

@@ -209,7 +229,8 @@ def _make_parser(self):
209229
def __next__(self):
210230
for event, node in self.event_stream:
211231
if event == "START_ELEMENT" and node.nodeName == "object":
212-
self.event_stream.expandNode(node)
232+
with fast_cache_clearing():
233+
self.event_stream.expandNode(node)
213234
return self._handle_object(node)
214235
raise StopIteration
215236

@@ -393,19 +414,25 @@ def _get_model_from_node(self, node, attr):
393414

394415
def getInnerText(node):
395416
"""Get all the inner text of a DOM node (recursively)."""
417+
inner_text_list = getInnerTextList(node)
418+
return "".join(inner_text_list)
419+
420+
421+
def getInnerTextList(node):
422+
"""Return a list of the inner texts of a DOM node (recursively)."""
396423
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
397-
inner_text = []
424+
result = []
398425
for child in node.childNodes:
399426
if (
400427
child.nodeType == child.TEXT_NODE
401428
or child.nodeType == child.CDATA_SECTION_NODE
402429
):
403-
inner_text.append(child.data)
430+
result.append(child.data)
404431
elif child.nodeType == child.ELEMENT_NODE:
405-
inner_text.extend(getInnerText(child))
432+
result.extend(getInnerTextList(child))
406433
else:
407434
pass
408-
return "".join(inner_text)
435+
return result
409436

410437

411438
# Below code based on Christian Heimes' defusedxml

docs/releases/4.2.27.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
1515
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
1616
PostgreSQL.
1717

18+
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
19+
=================================================================================
20+
21+
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
22+
denial-of-service attack due to quadratic time complexity when deserializing
23+
crafted documents containing many nested invalid elements. The internal helper
24+
``django.core.serializers.xml_serializer.getInnerText()`` previously
25+
accumulated inner text inefficiently during recursion. It now collects text per
26+
element, avoiding excessive resource usage.
27+
1828
Bugfixes
1929
========
2030

docs/releases/5.1.15.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
1515
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
1616
PostgreSQL.
1717

18+
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
19+
=================================================================================
20+
21+
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
22+
denial-of-service attack due to quadratic time complexity when deserializing
23+
crafted documents containing many nested invalid elements. The internal helper
24+
``django.core.serializers.xml_serializer.getInnerText()`` previously
25+
accumulated inner text inefficiently during recursion. It now collects text per
26+
element, avoiding excessive resource usage.
27+
1828
Bugfixes
1929
========
2030

docs/topics/serialization.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ Identifier Information
173173
.. _jsonl: https://jsonlines.org/
174174
.. _PyYAML: https://pyyaml.org/
175175

176+
.. _serialization-formats-xml:
177+
176178
XML
177179
---
178180

tests/serializers/test_xml.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import gc
2+
import time
13
from xml.dom import minidom
24

35
from django.core import serializers
4-
from django.core.serializers.xml_serializer import DTDForbidden
6+
from django.core.serializers.xml_serializer import Deserializer, DTDForbidden
7+
from django.db import models
58
from django.test import TestCase, TransactionTestCase
69

710
from .tests import SerializersTestBase, SerializersTransactionTestBase
@@ -90,6 +93,56 @@ def test_no_dtd(self):
9093
with self.assertRaises(DTDForbidden):
9194
next(serializers.deserialize("xml", xml))
9295

96+
def test_crafted_xml_performance(self):
97+
"""The time to process invalid inputs is not quadratic."""
98+
99+
def build_crafted_xml(depth, leaf_text_len):
100+
nested_open = "<nested>" * depth
101+
nested_close = "</nested>" * depth
102+
leaf = "x" * leaf_text_len
103+
field_content = f"{nested_open}{leaf}{nested_close}"
104+
return f"""
105+
<django-objects version="1.0">
106+
<object model="contenttypes.contenttype" pk="1">
107+
<field name="app_label">{field_content}</field>
108+
<field name="model">m</field>
109+
</object>
110+
</django-objects>
111+
"""
112+
113+
def deserialize(crafted_xml):
114+
iterator = Deserializer(crafted_xml)
115+
gc.collect()
116+
117+
start_time = time.perf_counter()
118+
result = list(iterator)
119+
end_time = time.perf_counter()
120+
121+
self.assertEqual(len(result), 1)
122+
self.assertIsInstance(result[0].object, models.Model)
123+
return end_time - start_time
124+
125+
def assertFactor(label, params, factor=2):
126+
factors = []
127+
prev_time = None
128+
for depth, length in params:
129+
crafted_xml = build_crafted_xml(depth, length)
130+
elapsed = deserialize(crafted_xml)
131+
if prev_time is not None:
132+
factors.append(elapsed / prev_time)
133+
prev_time = elapsed
134+
135+
with self.subTest(label):
136+
# Assert based on the average factor to reduce test flakiness.
137+
self.assertLessEqual(sum(factors) / len(factors), factor)
138+
139+
assertFactor(
140+
"varying depth, varying length",
141+
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
142+
2,
143+
)
144+
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
145+
93146

94147
class XmlSerializerTransactionTestCase(
95148
SerializersTransactionTestBase, TransactionTestCase

0 commit comments

Comments
 (0)
0