8000 Fixed whitespace-stripping filter and added some unit tests (to be co… · html5lib/html5lib-python@c851900 · GitHub
[go: up one dir, main page]

Skip to content

Commit c851900

Browse files
committed
Fixed whitespace-stripping filter and added some unit tests (to be completed a bit)
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40912
1 parent edaaab3 commit c851900

File tree

2 files changed

+106
-2
lines changed

2 files changed

+106
-2
lines changed

src/html5lib/filters/whitespace.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ def __iter__(self):
2525
elif type == "EndTag" and preserve:
2626
preserve -= 1
2727

28-
elif not preserve and type == "SpaceCharacters":
29-
continue
28+
elif not preserve and type == "SpaceCharacters" and token["data"]:
29+
# Test on token["data"] above to not introduce spaces where there were not
30+
token["data"] = u" "
3031

3132
elif not preserve and type == "Characters":
3233
token["data"] = collapse_spaces(token["data"])

tests/test_whitespace_filter.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import unittest
2+
3+
from html5lib.filters.whitespace import Filter
4+
from html5lib.constants import spaceCharacters
5+
spaceCharacters = u"".join(spaceCharacters)
6+
7+
class TestCase(unittest.TestCase):
8+
def runTest(self, input, expected):
9+
output = list(Filter(input))
10+
errorMsg = "\n".join(["\n\nInput:", str(input),
11+
"\nExpected:", str(expected),
12+
"\nReceived:", str(output)])
13+
self.assertEquals(output, expected, errorMsg)
14+
15+
def runTestUnmodifiedOutput(self, input):
16+
self.runTest(input, input)
17+
18+
def testLeadingWhitespace(self):
19+
self.runTest(
20+
[{"type": u"StartTag", "name": u"p", "data": []},
21+
{"type": u"SpaceCharacters", "data": spaceCharacters},
22+
{"type": u"Characters", "data": u"foo"},
23+
{"type": u"EndTag", "name": u"p", "data": []}],
24+
[{"type": u"StartTag", "name": u"p", "data": []},
25+
{"type": u"SpaceCharacters", "data": u" "},
26+
{"type": u"Characters", "data": u"foo"},
27+
{"type": u"EndTag", "name": u"p", "data": []}])
28+
29+
def testLeadingWhitespaceAsCharacters(self):
30+
self.runTest(
31+
[{"type": u"StartTag", "name": u"p", "data": []},
32+
{"type": u"Characters", "data": spaceCharacters + u"foo"},
33+
{"type": u"EndTag", "name": u"p", "data": []}],
34+
[{"type": u"StartTag", "name": u"p", "data": []},
35+
{"type": u"Characters", "data": u" foo"},
36+
{"type": u"EndTag", "name": u"p", "data": []}])
37+
38+
def testTrailingWhitespace(self):
39+
self.runTest(
40+
[{"type": u"StartTag", "name": u"p", "data": []},
41+
{"type": u"Characters", "data": u"foo"},
42+
{"type": u"SpaceCharacters", "data": spaceCharacters},
43+
{"type": u"EndTag", "name": u"p", "data": []}],
44+
[{"type": u"StartTag", "name": u"p", "data": []},
45+
{"type": u"Characters", "data": u"foo"},
46+
{"type": u"SpaceCharacters", "data": u" "},
47+
{"type": u"EndTag", "name": u"p", "data": []}])
48+
49+
def testTrailingWhitespaceAsCharacters(self):
50+
self.runTest(
51+
[{"type": u"StartTag", "name": u"p", "data": []},
52+
{"type": u"Characters", "data": u"foo" + spaceCharacters},
53+
{"type": u"EndTag", "name": u"p", "data": []}],
54+
[{"type": u"StartTag", "name": u"p", "data": []},
55+
{"type": u"Characters", "data": u"foo "},
56+
{"type": u"EndTag", "name": u"p", "data": []}])
57+
58+
def testWhitespace(self):
59+
self.runTest(
60+
[{"type": u"StartTag", "name": u"p", "data": []},
61+
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
62+
{"type": u"EndTag", "name": u"p", "data": []}],
63+
[{"type": u"StartTag", "name": u"p", "data": []},
64+
{"type": u"Characters", "data": u"foo bar"},
65+
{"type": u"EndTag", "name": u"p", "data": []}])
66+
67+
def testLeadingWhitespaceInPre(self):
68+
self.runTestUnmodifiedOutput(
69+
[{"type": u"StartTag", "name": u"pre", "data": []},
70+
{"type": u"SpaceCharacters", "data": spaceCharacters},
71+
{"type": u"Characters", "data": u"foo"},
72+
{"type": u"EndTag", "name": u"pre", "data": []}])
73+
74+
def testLeadingWhitespaceAsCharactersInPre(self):
75+
self.runTestUnmodifiedOutput(
76+
[{"type": u"StartTag", "name": u"pre", "data": []},
77+
{"type": u"Characters", "data": spaceCharacters + u"foo"},
78+
{"type": u"EndTag", "name": u"pre", "data": []}])
79+
80+
def testTrailingWhitespaceInPre(self):
81+
self.runTestUnmodifiedOutput(
82+
[{"type": u"StartTag", "name": u"pre", "data": []},
83+
{"type": u"Characters", "data": u"foo"},
84+
{"type": u"SpaceCharacters", "data": spaceCharacters},
85+
{"type": u"EndTag", "name": u"pre", "data": []}])
86+
87+
def testTrailingWhitespaceAsCharactersInPre(self):
88+
self.runTestUnmodifiedOutput(
89+
[{"type": u"StartTag", "name": u"pre", "data": []},
90+
{"type": u"Characters", "data": u"foo" + spaceCharacters},
91+
{"type": u"EndTag", "name": u"pre", "data": []}])
92+
93+
def testWhitespaceInPre(self):
94+
self.runTestUnmodifiedOutput(
95+
[{"type": u"StartTag", "name": u"pre", "data": []},
96+
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
97+
{"type": u"EndTag", "name": u"pre", "data": []}])
98+
99+
def main():
100+
unittest.main()
101+
102+
if __name__ == "__main__":
103+
main()

0 commit comments

Comments
 (0)
0