8000 fix: Extra content at the end of the document (#161) · ruby/rexml@eb45c8d · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit eb45c8d

Browse files
authored
fix: Extra content at the end of the document (#161)
## Why? XML with additional content at the end of the document is invalid. https://www.w3.org/TR/2006/REC-xml11-20060816/#document ``` [1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Misc ``` [27] Misc ::= Comment | PI | S ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PI ``` [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget ``` [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) ```
1 parent face9dd commit eb45c8d

File tree

6 files changed

+99
-7
lines changed

6 files changed

+99
-7
lines changed

lib/rexml/parsers/baseparser.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,15 +460,24 @@ def pull_event
460460
@closed = tag
461461
@nsstack.shift
462462
else
463+
if @tags.empty? and @have_root
464+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
465+
end
463466
@tags.push( tag )
464467
end
468+
@have_root = true
465469
return [ :start_element, tag, attributes ]
466470
end
467471
else
468472
text = @source.read_until("<")
469473
if text.chomp!("<")
470474
@source.position -= "<".bytesize
471475
end
476+
if @tags.empty? and @have_root
477+
unless /\A\s*\z/.match?(text)
478+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
479+
end
480+
end
472481
return [ :text, text ]
473482
end
474483
rescue REXML::UndefinedNamespaceException

test/parse/test_comment.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,5 +105,17 @@ def test_after_doctype_malformed_comment_end
105105
DETAIL
106106
end
107107
end
108+
109+
def test_after_root
110+
parser = REXML::Parsers::BaseParser.new('<a></a><!-- ok comment -->')
111+
112+
events = {}
113+
while parser.has_next?
114+
event = parser.pull
115+
events[event[0]] = event[1]
116+
end
117+
118+
assert_equal(" ok comment ", events[:comment])
119+
end
108120
end
109121
end

test/parse/test_element.rb

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,40 @@ def test_garbage_less_than_slash_before_end_tag_at_line_start
8585
</ </x>
8686
DETAIL
8787
end
88+
89+
def test_after_root
90+
exception = assert_raise(REXML::ParseException) do
91+
parser = REXML::Parsers::BaseParser.new('<a></a><b>')
92+
while parser.has_next?
93+
parser.pull
94+
end
95+
end
96+
97+
assert_equal(<<~DETAIL.chomp, exception.to_s)
98+
Malformed XML: Extra tag at the end of the document (got '<b')
99+
Line: 1
100+
Position: 10
101+
Last 80 unconsumed characters:
102+
103+
DETAIL
104+
end
105+
106+
def test_after_empty_element_tag_root
107+
exception = assert_raise(REXML::ParseException) do
108+
parser = REXML::Parsers::BaseParser.new('<a/><b>')
109+
while parser.has_next?
110+
parser.pull
111+
end
112+
end
113+
114+
assert_equal(<<~DETAIL.chomp, exception.to_s)
115+
Malformed XML: Extra tag at the end of the document (got '<b')
116+
Line: 1
117+
Position: 7
118+
Last 80 unconsumed characters:
119+
120+
DETAIL
121+
end
88122
end
89123
end
90124
end

test/parse/test_processing_instruction.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,17 @@ def test_garbage_text
4040
])
4141
end
4242
end
43+
44+
def test_after_root
45+
parser = REXML::Parsers::BaseParser.new('<a></a><?abc version="1.0" ?>')
46+
47+
events = {}
48+
while parser.has_next?
49+
event = parser.pull
50+
events[event[0]] = event[1]
51+
end
52+
53+
assert_equal("abc", events[:processing_instruction])
54+
end
4355
end
4456
end

test/parse/test_text.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
require "test/unit"
2+
require 'rexml/parsers/baseparser'
3+
4+
module REXMLTests
5+
class TestParseText < Test::Unit::TestCase
6+
class TestInvalid < self
7+
def test_after_root
8+
exception = assert_raise(REXML::ParseException) do
9+
parser = REXML::Parsers::BaseParser.new('<a></a>c')
10+
while parser.has_next?
11+
parser.pull
12+
end
13+
end
14+
15+
assert_equal(<<~DETAIL.chomp, exception.to_s)
16+
Malformed XML: Extra content at the end of the document (got 'c')
17+
Line: 1
18+
Position: 8
19+
Last 80 unconsumed characters:
20+
21+
DETAIL
22+
end
23+
end
24+
end
25+
end

test/test_pullparser.rb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,23 +63,23 @@ def test_entity_replacement
6363
end
6464

6565
def test_character_references
66-
source = '<a>&#65;</a><b>&#x42;</b>'
66+
source = '<root><a>&#65;</a><b>&#x42;</b></root>'
6767
parser = REXML::Parsers::PullParser.new( source )
68+
69+
events = {}
6870
element_name = ''
6971
while parser.has_next?
7072
event = parser.pull
7173
case event.event_type
7274
when :start_element
7375
element_name = event[0]
7476
when :text
75-
case element_name
76-
when 'a'
77-
assert_equal('A', event[1])
78-
when 'b'
79-
assert_equal('B', event[1])
80-
end
77+
events[element_name] = event[1]
8178
end
8279
end
80+
81+
assert_equal('A', events['a'])
82+
assert_equal("B", events['b'])
8383
end
8484

8585
def test_text_content_with_line_breaks

0 commit comments

Comments
 (0)
0