|
1 |
| -import os,sys,unittest |
| 1 | +import os,sys,unittest,glob |
| 2 | +from support import simplejson |
2 | 3 |
|
3 | 4 | #RELEASE remove
|
4 |
| -if __name__ == '__main__': |
5 |
| - # XXX Allow us to import the sibling module |
6 |
| - os.chdir(os.path.split(os.path.abspath(__file__))[0]) |
7 |
| - sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) |
8 |
| - |
9 | 5 | import html5parser, sanitizer, constants
|
10 | 6 | #END RELEASE
|
11 | 7 |
|
|
15 | 11 |
|
16 | 12 | class SanitizeTest(unittest.TestCase):
|
17 | 13 | def addTest(cls, name, expected, input):
|
18 |
| - setattr(cls, name, |
19 |
| - lambda self: self.assertEqual(expected, self.sanitize_html(input))) |
| 14 | + def test(self, expected=expected, input=input): |
| 15 | + expected = ''.join([token.toxml() for token in html5parser.HTMLParser(). |
| 16 | + parseFragment(expected.encode('utf-8')).childNodes]) |
| 17 | + self.assertEqual(expected, self.sanitize_html(input)) |
| 18 | + setattr(cls, name, test) |
20 | 19 | addTest = classmethod(addTest)
|
21 | 20 |
|
22 | 21 | def sanitize_html(self,stream):
|
23 | 22 | return ''.join([token.toxml() for token in
|
24 | 23 | html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
|
25 | 24 | parseFragment(stream).childNodes])
|
26 | 25 |
|
27 |
| - def test_should_allow_anchors(self): |
28 |
| - self.assertEqual("<a href=\"foo\"><script>baz</script></a>", |
29 |
| - self.sanitize_html("<a href='foo' onclick='bar'><script>baz</script></a>")) |
30 |
| - |
31 |
| - # RFC 3986, sec 4.2 |
32 |
| - def test_allow_colons_in_path_component(self): |
33 |
| - self.assertEqual("<a href=\"./this:that\">foo</a>", |
34 |
| - self.sanitize_html("<a href=\"./this:that\">foo</a>")) |
35 |
| - |
36 |
| - def test_should_handle_non_html(self): |
37 |
| - self.assertEqual('abc', self.sanitize_html("abc")) |
38 |
| - |
39 |
| - def test_should_handle_blank_text(self): |
40 |
| - self.assertEqual('', self.sanitize_html('')) |
41 |
| - |
42 |
| - def test_should_sanitize_tag_broken_up_by_null(self): |
43 |
| - self.assertEqual(u"<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>", self.sanitize_html("""<scr\0ipt>alert(\"XSS\")</scr\0ipt>""")) |
44 |
| - |
45 |
| - def test_should_sanitize_invalid_script_tag(self): |
46 |
| - self.assertEqual("<script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"></script>", self.sanitize_html("""<script/XSS SRC="http://ha.ckers.org/xss.js"></script>""")) |
47 |
| - |
48 |
| - def test_should_sanitize_script_tag_with_multiple_open_brackets(self): |
49 |
| - self.assertEqual("<<script>alert(\"XSS\");//<</script>", self.sanitize_html("""<<script>alert("XSS");//<</script>""")) |
50 |
| - self.assertEqual("""<iframe src=\"http://ha.ckers.org/scriptlet.html\"><""", self.sanitize_html("""<iframe src=http://ha.ckers.org/scriptlet.html\n<""")) |
51 |
| - |
52 |
| - def test_should_sanitize_unclosed_script(self): |
53 |
| - self.assertEqual("<script src=\"http://ha.ckers.org/xss.js?\"><b/>", self.sanitize_html("""<script src=http://ha.ckers.org/xss.js?<b>""")) |
54 |
| - |
55 |
| - def test_should_sanitize_half_open_scripts(self): |
56 |
| - self.assertEqual("<img/>", self.sanitize_html("""<img src="javascript:alert('XSS')""")) |
57 |
| - |
58 |
| - def test_should_not_fall_for_ridiculous_hack(self): |
59 |
| - img_hack = """<img\nsrc\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n />""" |
60 |
| - self.assertEqual("<img/>", self.sanitize_html(img_hack)) |
61 |
| - |
62 |
| - def test_platypus(self): |
63 |
| - self.assertEqual("""<a style=\"display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;\" href=\"http://www.ragingplatypus.com/\">never trust your upstream platypus</a>""", |
64 |
| - self.sanitize_html("""<a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>""")) |
65 |
| - |
66 |
| - def test_xul(self): |
67 |
| - self.assertEqual("""<p style="">fubar</p>""", |
68 |
| - self.sanitize_html("""<p style="-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')">fubar</p>""")) |
69 |
| - |
70 |
| - def test_input_image(self): |
71 |
| - self.assertEqual("""<input type="image"/>""", |
72 |
| - self.sanitize_html("""<input type="image" src="javascript:alert('XSS');" />""")) |
73 |
| - |
74 |
| - def test_non_alpha_non_digit(self): |
75 |
| - self.assertEqual(u"<script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"></script>", |
76 |
| - self.sanitize_html("""<script/XSS src="http://ha.ckers.org/xss.js"></script>""")) |
77 |
| - self.assertEqual("<a>foo</a>", |
78 |
| - self.sanitize_html('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>')) |
79 |
| - self.assertEqual("<img src=\"http://ha.ckers.org/xss.js\"/>", |
80 |
| - self.sanitize_html('<img/src="http://ha.ckers.org/xss.js"/>')) |
81 |
| - |
82 |
| - def test_img_dynsrc_lowsrc(self): |
83 |
| - self.assertEqual("<img/>", |
84 |
| - self.sanitize_html("""<img dynsrc="javascript:alert('XSS')" />""")) |
85 |
| - self.assertEqual("<img/>", |
86 |
| - self.sanitize_html("""<img lowsrc="javascript:alert('XSS')" />""")) |
87 |
| - |
88 |
| - def test_div_background_image_unicode_encoded(self): |
89 |
| - self.assertEqual('<div style="">foo</div>', |
90 |
| - self.sanitize_html("""<div style="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">foo</div>""")) |
91 |
| - |
92 |
| - def test_div_expression(self): |
93 |
| - self.assertEqual(u'<div style="">foo</div>', |
94 |
| - self.sanitize_html("""<div style="width: expression(alert('XSS'));">foo</div>""")) |
95 |
| - |
96 |
| - def test_img_vbscript(self): |
97 |
| - self.assertEqual(u'<img/>', |
98 |
| - self.sanitize_html("""<img src='vbscript:msgbox("XSS")' />""")) |
99 |
| - |
100 | 26 | def test_should_handle_astral_plane_characters(self):
|
101 | 27 | self.assertEqual(u"<p>\U0001d4b5 \U0001d538</p>",
|
102 | 28 | self.sanitize_html("<p>𝒵 𝔸</p>"))
|
103 | 29 |
|
104 |
| - |
105 |
| -for i,img_hack in enumerate( |
106 |
| - ["""<img src="javascript:alert('XSS');" />""", |
107 |
| - """<img src=javascript:alert('XSS') />""", |
108 |
| - """<img src="JaVaScRiPt:alert('XSS')" />""", |
109 |
| - """<img src='javascript:alert("XSS")' />""", |
110 |
| - """<img src='javascript:alert(String.fromCharCode(88,83,83))' />""", |
111 |
| - """<img src='javascript:alert('XSS')' />""", |
112 |
| - """<img src='javascript:alert('XSS')' />""", |
113 |
| - """<img src='javascript:alert('XSS')' />""", |
114 |
| - """<img src="jav\tascript:alert('XSS');" />""", |
115 |
| - """<img src="jav	ascript:alert('XSS');" />""", |
116 |
| - """<img src="jav
ascript:alert('XSS');" />""", |
117 |
| - """<img src="jav
ascript:alert('XSS');" />""", |
118 |
| - """<img src="  javascript:alert('XSS');" />""", |
119 |
| - """<img src=" javascript:alert('XSS');" />""", |
120 |
| - """<img src=" javascript:alert('XSS');" />"""]): |
121 |
| - SanitizeTest.addTest("test_should_not_fall_for_xss_image_hack_#%d"%i, |
122 |
| - "<img/>", img_hack) |
123 |
| - |
124 |
| -for tag, attr in [('img','src'), ('a','href')]: |
125 |
| - close = tag in constants.voidElements and "/>boo" or ">boo</%s>" % tag |
126 |
| - |
127 |
| - SanitizeTest.addTest("test_should_strip_%s_attribute_in_%s_with_bad_protocols" % (attr,tag), |
128 |
| - """<%s title="1"%s""" % (tag, close), |
129 |
| - """<%s %s="javascript:XSS" title="1">boo</%s>""" % (tag,attr,tag)) |
130 |
| - |
131 |
| - SanitizeTest.addTest("test_should_strip_%s_attribute_in_%s_with_bad_protocols_and_whitespace" % (attr,tag), |
132 |
| - """<%s title="1"%s""" % (tag, close), |
133 |
| - """<%s %s=" javascript:XSS" title="1">boo</%s>""" % (tag,attr,tag)) |
134 |
| - |
135 |
| -for img_attr in ['src', 'width', 'height', 'alt']: |
136 |
| - SanitizeTest.addTest("test_should_allow_image_%s_attribute" % img_attr, |
137 |
| - "<img %s=\"foo\"/>" % img_attr, |
138 |
| - "<img %s='foo' onclick='bar' />" % img_attr) |
139 |
| - |
140 | 30 | for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
|
141 | 31 | if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: continue ### TODO
|
142 | 32 | if tag_name != tag_name.lower(): continue ### TODO
|
@@ -183,6 +73,10 @@ def test_should_handle_astral_plane_characters(self):
|
183 | 73 | """<a href="%s">foo</a>&qu
F438
ot;"" % protocol)
|
184 | 74 |
|
185 | 75 | def buildTestSuite():
|
| 76 | + for filename in glob.glob("sanitizer/*.dat"): |
| 77 | + for test in simplejson.load(file(filename)): |
| 78 | + SanitizeTest.addTest('test_' + test['name'], test['output'], test['input']) |
| 79 | + |
186 | 80 | return unittest.TestLoader().loadTestsFromTestCase(SanitizeTest)
|
187 | 81 |
|
188 | 82 | def main():
|
|
0 commit comments