8000 [3.8] bpo-37399: Correctly attach tail text to the last element/comme… · python/cpython@bb69789 · GitHub
[go: up one dir, main page]

Skip to content

Commit bb69789

Browse files
authored
[3.8] bpo-37399: Correctly attach tail text to the last element/comment/pi (GH-14856) (GH-14936)
* bpo-37399: Correctly attach tail text to the last element/comment/pi, even when comments or pis are discarded. Also fixes the insertion of PIs when "insert_pis=True" is configured for a TreeBuilder.
1 parent 6367391 commit bb69789

File tree

2 files changed

+124
-24
lines changed

2 files changed

+124
-24
lines changed

Lib/test/test_xml_etree.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2954,6 +2954,66 @@ def test_treebuilder_pi(self):
29542954
self.assertEqual(b.pi('target'), (len('target'), None))
29552955
self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
29562956

2957+
def test_late_tail(self):
2958+
# Issue #37399: The tail of an ignored comment could overwrite the text before it.
2959+
class TreeBuilderSubclass(ET.TreeBuilder):
2960+
pass
2961+
2962+
xml = "<a>text<!-- comment -->tail</a>"
2963+
a = ET.fromstring(xml)
2964+
self.assertEqual(a.text, "texttail")
2965+
2966+
parser = ET.XMLParser(target=TreeBuilderSubclass())
2967+
parser.feed(xml)
2968+
a = parser.close()
2969+
self.assertEqual(a.text, "texttail")
2970+
2971+
xml = "<a>text<?pi data?>tail</a>"
2972+
a = ET.fromstring(xml)
2973+
self.assertEqual(a.text, "texttail")
2974+
2975+
xml = "<a>text<?pi data?>tail</a>"
2976+
parser = ET.XMLParser(target=TreeBuilderSubclass())
2977+
parser.feed(xml)
2978+
a = parser.close()
2979+
self.assertEqual(a.text, "texttail")
2980+
2981+
def test_late_tail_mix_pi_comments(self):
2982+
# Issue #37399: The tail of an ignored comment could overwrite the text before it.
2983+
# Test appending tails to comments/pis.
2984+
class TreeBuilderSubclass(ET.TreeBuilder):
2985+
pass
2986+
2987+
xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
2988+
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
2989+
parser.feed(xml)
2990+
a = parser.close()
2991+
self.assertEqual(a[0].text, ' comment ')
2992+
self.assertEqual(a[0].tail, '\ntail')
2993+
self.assertEqual(a.text, "text ")
2994+
2995+
parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
2996+
parser.feed(xml)
2997+
a = parser.close()
2998+
self.assertEqual(a[0].text, ' comment ')
2999+
self.assertEqual(a[0].tail, '\ntail')
3000+
self.assertEqual(a.text, "text ")
3001+
3002+
xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3003+
parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3004+
parser.feed(xml)
3005+
a = parser.close()
3006+
self.assertEqual(a[0].text, 'pi data')
3007+
self.assertEqual(a[0].tail, 'tail')
3008+
self.assertEqual(a.text, "text\n")
3009+
3010+
parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3011+
parser.feed(xml)
3012+
a = parser.close()
3013+
self.assertEqual(a[0].text, 'pi data')
3014+
self.assertEqual(a[0].tail, 'tail')
3015+
self.assertEqual(a.text, "text\n")
3016+
29573017
def test_treebuilder_elementfactory_none(self):
29583018
parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
29593019
parser.feed(self.sample1)

Modules/_elementtree.c

Lines changed: 64 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,6 +2399,7 @@ typedef struct {
23992399

24002400
PyObject *this; /* current node */
24012401
PyObject *last; /* most recently created node */
2402+
PyObject *last_for_tail; /* most recently created node that takes a tail */
24022403

24032404
PyObject *data; /* data collector (string or list), or NULL */
24042405

@@ -2530,6 +2531,7 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
25302531
Py_VISIT(self->root);
25312532
Py_VISIT(self->this);
25322533
Py_VISIT(self->last);
2534+
Py_VISIT(self->last_for_tail);
25332535
Py_VISIT(self->data);
25342536
Py_VISIT(self->stack);
25352537
Py_VISIT(self->pi_factory);
@@ -2551,6 +2553,7 @@ treebuilder_gc_clear(TreeBuilderObject *self)
25512553
Py_CLEAR(self->stack);
25522554
Py_CLEAR(self->data);
25532555
Py_CLEAR(self->last);
2556+
Py_CLEAR(self->last_for_tail);
25542557
Py_CLEAR(self->this);
25552558
Py_CLEAR(self->pi_factory);
25562559
Py_CLEAR(self->comment_factory);
@@ -2622,21 +2625,50 @@ _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
26222625
}
26232626

26242627
static int
2625-
treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
2626-
PyObject **dest, _Py_Identifier *name)
2628+
treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2629+
PyObject **dest, _Py_Identifier *name)
26272630
{
2631+
/* Fast paths for the "almost always" cases. */
26282632
if (Element_CheckExact(element)) {
2629-
PyObject *tmp = JOIN_OBJ(*dest);
2630-
*dest = JOIN_SET(*data, PyList_CheckExact(*data));
2631-
*data = NULL;
2632-
Py_DECREF(tmp);
2633-
return 0;
2633+
PyObject *dest_obj = JOIN_OBJ(*dest);
2634+
if (dest_obj == Py_None) {
2635+
*dest = JOIN_SET(*data, PyList_CheckExact(*data));
2636+
*data = NULL;
2637+
Py_DECREF(dest_obj);
2638+
return 0;
2639+
}
2640+
else if (JOIN_GET(*dest)) {
2641+
if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2642+
return -1;
2643+
}
2644+
Py_CLEAR(*data);
2645+
return 0;
2646+
}
26342647
}
2635-
else {
2636-
PyObject *joined = list_join(*data);
2648+
2649+
/* Fallback for the non-Element / non-trivial cases. */
2650+
{
26372651
int r;
2638-
if (joined == NULL)
2652+
PyObject* joined;
2653+
PyObject* previous = _PyObject_GetAttrId(element, name);
2654+
if (!previous)
2655+
return -1;
2656+
joined = list_join(*data);
2657+
if (!joined) {
2658+
Py_DECREF(previous);
26392659
return -1;
2660+
}
2661+
if (previous != Py_None) {
2662+
PyObject *tmp = PyNumber_Add(previous, joined);
2663+
Py_DECREF(joined);
2664+
Py_DECREF(previous);
2665+
if (!tmp)
2666+
return -1;
2667+
joined = tmp;
2668+
} else {
2669+
Py_DECREF(previous);
2670+
}
2671+
26402672
r = _PyObject_SetAttrId(element, name, joined);
26412673
Py_DECREF(joined);
26422674
if (r < 0)
@@ -2649,21 +2681,21 @@ treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
26492681
LOCAL(int)
26502682
treebuilder_flush_data(TreeBuilderObject* self)
26512683
{
2652-
PyObject *element = self->last;
2653-
26542684
if (!self->data) {
26552685
return 0;
26562686
}
26572687

2658-
if (self->this == element) {
2688+
if (!self->last_for_tail) {
2689+
PyObject *element = self->last;
26592690
_Py_IDENTIFIER(text);
2660-
return treebuilder_set_element_text_or_tail(
2691+
return treebuilder_extend_element_text_or_tail(
26612692
element, &self->data,
26622693
&((ElementObject *) element)->text, &PyId_text);
26632694
}
26642695
else {
2696+
PyObject *element = self->last_for_tail;
26652697
_Py_IDENTIFIER(tail);
2666-
return treebuilder_set_element_text_or_tail(
2698+
return treebuilder_extend_element_text_or_tail(
26672699
element, &self->data,
26682700
&((ElementObject *) element)->tail, &PyId_tail);
26692701
}
@@ -2739,6 +2771,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
27392771
}
27402772

27412773
this = self->this;
2774+
Py_CLEAR(self->last_for_tail);
27422775

27432776
if (this != Py_None) {
27442777
if (treebuilder_add_subelement(this, node) < 0)
@@ -2836,6 +2869,8 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
28362869

28372870
item = self->last;
28382871
self->last = self->this;
2872+
Py_INCREF(self->last);
2873+
Py_XSETREF(self->last_for_tail, self->last);
28392874
self->index--;
28402875
self->this = PyList_GET_ITEM(self->stack, self->index);
28412876
Py_INCREF(self->this);
@@ -2851,7 +2886,7 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
28512886
LOCAL(PyObject*)
28522887
treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
28532888
{
2854-
PyObject* comment = NULL;
2889+
PyObject* comment;
28552890
PyObject* this;
28562891

28572892
if (treebuilder_flush_data(self) < 0) {
@@ -2867,6 +2902,8 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
28672902
if (self->insert_comments && this != Py_None) {
28682903
if (treebuilder_add_subelement(this, comment) < 0)
28692904
goto error;
2905+
Py_INCREF(comment);
2906+
Py_XSETREF(self->last_for_tail, comment);
28702907
}
28712908
} else {
28722909
Py_INCREF(text);
@@ -2888,7 +2925,7 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
28882925
LOCAL(PyObject*)
28892926
treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
28902927
{
2891-
PyObject* pi = NULL;
2928+
PyObject* pi;
28922929
PyObject* this;
28932930
PyObject* stack[2] = {target, text};
28942931

@@ -2906,6 +2943,8 @@ treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
29062943
if (self->insert_pis && this != Py_None) {
29072944
if (treebuilder_add_subelement(this, pi) < 0)
29082945
goto error;
2946+
Py_INCREF(pi);
2947+
Py_XSETREF(self->last_for_tail, pi);
29092948
}
29102949
} else {
29112950
pi = PyTuple_Pack(2, target, text);
@@ -3495,8 +3534,8 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
34953534
static void
34963535
expat_comment_handler< F438 /span>(XMLParserObject* self, const XML_Char* comment_in)
34973536
{
3498-
PyObject* comment = NULL;
3499-
PyObject* res = NULL;
3537+
PyObject* comment;
3538+
PyObject* res;
35003539

35013540
if (PyErr_Occurred())
35023541
return;
@@ -3510,16 +3549,17 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
35103549
return; /* parser will look for errors */
35113550

35123551
res = treebuilder_handle_comment(target, comment);
3552+
Py_XDECREF(res);
3553+
Py_DECREF(comment);
35133554
} else if (self->handle_comment) {
35143555
comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
35153556
if (!comment)
35163557
return;
35173558

35183559
res = _PyObject_FastCall(self->handle_comment, &comment, 1);
3560+
Py_XDECREF(res);
3561+
Py_DECREF(comment);
35193562
}
3520-
3521-
Py_XDECREF(res);
3522-
Py_DECREF(comment);
35233563
}
35243564

35253565
static void
@@ -3587,7 +3627,7 @@ static void
35873627
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
35883628
const XML_Char* data_in)
35893629
{
3590-
PyObject* pi_target = NULL;
3630+
PyObject* pi_target;
35913631
PyObject* data;
35923632
PyObject* res;
35933633
PyObject* stack[2];
@@ -3599,7 +3639,7 @@ expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
35993639
/* shortcut */
36003640
TreeBuilderObject *target = (TreeBuilderObject*) self->target;
36013641

3602-
if (target->events_append && target->pi_event_obj) {
3642+
if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
36033643
pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
36043644
if (!pi_target)
36053645
goto error;

0 commit comments

Comments
 (0)
0