8000 fix: Content at the start of the document · ruby/rexml@1722700 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1722700

Browse files
committed
fix: Content at the start of the document
## Why? XML with content at the start of the document is invalid. https://www.w3.org/TR/2006/REC-xml11-20060816/#document ``` [1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-prolog ``` [22] prolog ::= XMLDecl Misc* (doctypedecl Misc*)? ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-XMLDecl ``` [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Misc ``` [27] Misc ::= Comment | PI | S ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PI ``` [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget ``` [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-doctypedecl ``` [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' ```
1 parent 2c39c91 commit 1722700

File tree

4 files changed

+58
-22
lines changed

4 files changed

+58
-22
lines changed

lib/rexml/parsers/baseparser.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,11 +486,15 @@ def pull_event
486486
if text.chomp!("<")
487487
@source.position -= "<".bytesize
488488
end
489-
if @tags.empty? and @have_root
489+
if @tags.empty?
490490
unless /\A\s*\z/.match?(text)
491-
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
491+
if @have_root
492+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
493+
else
494+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
495+
end
492496
end
493-
return pull_event
497+
return pull_event if @have_root
494498
end
495499
return [ :text, text ]
496500
end

test/parse/test_comment.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,18 @@ def test_after_doctype_malformed_comment_end
110110
end
111111
end
112112

113+
def test_before_root
114+
parser = REXML::Parsers::BaseParser.new('<!-- ok comment --><a></a>')
115+
116+
events = {}
117+
while parser.has_next?
118+
event = parser.pull
119+
events[event[0]] = event[1]
120+
end
121+
122+
assert_equal(" ok comment ", events[:comment])
123+
end
124+
113125
def test_after_root
114126
parser = REXML::Parsers::BaseParser.new('<a></a><!-- ok comment -->')
115127

test/parse/test_processing_instruction.rb

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,6 @@ def test_no_name
2525
DETAIL
2626
end
2727

28-
def test_garbage_text
29-
# TODO: This should be parse error.
30-
# Create test/parse/test_document.rb or something and move this to it.
31-
doc = parse(<<-XML)
32-
x<?x y
33-
<!--?><?x -->?>
34-
<r/>
35-
XML
36-
pi = doc.children[1]
37-
assert_equal([
38-
"x",
39-
"y\n<!--",
40-
],
41-
[
42-
pi.target,
43-
pi.content,
44-
])
45-
end
46-
4728
def test_xml_declaration_not_at_document_start
4829
exception = assert_raise(REXML::ParseException) do
4930
parser = REXML::Parsers::BaseParser.new('<a><?xml version="1.0" ?></a>')
@@ -62,6 +43,28 @@ def test_xml_declaration_not_at_document_start
6243
end
6344
end
6445

46+
def test_garbage_text
47+
doc = parse(<<~XML)
48+
<?x y
49+
<!--?><?x -->?>
50+
<r/>
51+
XML
52+
assert_equal(["x", "y\n<!--"], [ doc.children[0].target, doc.children[0].content])
53+
assert_equal(["x", "-->"], [ doc.children[1].target, doc.children[1].content])
54+
end
55+
56+
def test_before_root
57+
parser = REXML::Parsers::BaseParser.new('<?abc version="1.0" ?><a></a>')
58+
59+
events = {}
60+
while parser.has_next?
61+
event = parser.pull
62+
events[event[0]] = event[1]
63+
end
64+
65+
assert_equal("abc", events[:processing_instruction])
66+
end
67+
6568
def test_after_root
6669
parser = REXML::Parsers::BaseParser.new('<a></a><?abc version="1.0" ?>')
6770

test/parse/test_text.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@
44
module REXMLTests
55
class TestParseText < Test::Unit::TestCase
66
class TestInvalid < self
7+
def test_before_root
8+
exception = assert_raise(REXML::ParseException) do
9+
parser = REXML::Parsers::BaseParser.new('b<a></a>')
10+
while parser.has_next?
11+
parser.pull
12+
end
13+
end
14+
15+
assert_equal(<<~DETAIL.chomp, exception.to_s)
16+
Malformed XML: Content at the start of the document (got 'b')
17+
Line: 1
18+
Position: 4
19+
Last 80 unconsumed characters:
20+
<a>
21+
DETAIL
22+
end
23+
724
def test_after_root
825
exception = assert_raise(REXML::ParseException) do
926
parser = REXML::Parsers::BaseParser.new('<a></a>c')

0 commit comments

Comments
 (0)
0