8000 Fix attribute order to the treebuilder to be document order · jpic/html5lib-python@e0dc25f · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e0dc25f

Browse files
committed
Fix attribute order to the treebuilder to be document order
Somehow I managed to screw this up so it became reverse document order!
1 parent a3b8252 commit e0dc25f

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

CHANGES.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ Change Log
44
0.999999999/1.0b10
55
~~~~~~~~~~~~~~~~~~
66

7-
Released on XXX
7+
Released on July 15, 2016
88

9-
* XXX
9+
* Fix attribute order going to the tree builder to be document order
10+
instead of reverse document order(!).
1011

1112

1213
0.99999999/1.0b9

html5lib/html5parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,11 @@ def normalizeToken(self, token):
265265
""" HTML5 specific normalizations to the token stream """
266266

267267
if token["type"] == tokenTypes["StartTag"]:
268-
token["data"] = OrderedDict(token['data'][::-1])
268+
raw = token["data"]
269+
token["data"] = OrderedDict(raw)
270+
if len(raw) > len(token["data"]):
271+
# we had some duplicated attribute, fix so first wins
272+
token["data"].update(raw[::-1])
269273

270274
return token
271275

html5lib/tests/test_parser2.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
from six import PY2, text_type
3+
from six import PY2, text_type, unichr
44

55
import io
66

77
from . import support # noqa
88

9-
from html5lib.constants import namespaces
9+
from html5lib.constants import namespaces, tokenTypes
1010
from html5lib import parse, parseFragment, HTMLParser
1111

1212

@@ -53,13 +53,42 @@ def test_unicode_file():
5353
assert parse(io.StringIO("a")) is not None
5454

5555

56+
def test_maintain_attribute_order():
57+
# This is here because we impl it in parser and not tokenizer
58+
p = HTMLParser()
59+
# generate loads to maximize the chance a hash-based mutation will occur
60+
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
61+
token = {'name': 'html',
62+
'selfClosing': False,
63+
'selfClosingAcknowledged': False,
64+
'type': tokenTypes["StartTag"],
65+
'data': attrs}
66+
out = p.normalizeToken(token)
67+
attr_order = list(out["data"].keys())
68+
assert attr_order == [x for x, i in attrs]
69+
70+
5671
def test_duplicate_attribute():
5772
# This is here because we impl it in parser and not tokenizer
5873
doc = parse('<p class=a class=b>')
5974
el = doc[1][0]
6075
assert el.get("class") == "a"
6176

6277

78+
def test_maintain_duplicate_attribute_order():
79+
# This is here because we impl it in parser and not tokenizer
80+
p = HTMLParser()
81+
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
82+
token = {'name': 'html',
83+
'selfClosing': False,
84+
'selfClosingAcknowledged': False,
85+
'type': tokenTypes["StartTag"],
86+
'data': attrs + [('a', len(attrs))]}
87+
out = p.normalizeToken(token)
88+
attr_order = list(out["data"].keys())
89+
assert attr_order == [x for x, i in attrs]
90+
91+
6392
def test_debug_log():
6493
parser = HTMLParser(debug=True)
6594
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")

0 commit comments

Comments
 (0)
0