python · Privat33r-dev · Mar 31, 2024 · Mar 31, 2024 · Apr 6, 2024 · Apr 6, 2024
@@ -9,7 +9,7 @@
 
 _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
 _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
-_commentclose = re.compile(r'--\s*>')
+_commentclose = re.compile(r'--!?>')
 _markedsectionclose = re.compile(r']\s*]\s*>')
 
 # An analysis of the MS-Word extensions is available at
@@ -81,7 +81,7 @@ def parse_declaration(self, i):
         # A simple, practical version could look like: ((name|stringlit) S*) + '>'
         n = len(rawdata)
         if rawdata[j:j+2] == '--': #comment
-            # Locate --.*-- as the body of the comment
+            # Locate the body of the comment.
             return self.parse_comment(i)
         elif rawdata[j] == '[': #marked section
             # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
@@ -161,13 +161,19 @@ def parse_marked_section(self, i, report=1):
             self.unknown_decl(rawdata[i+3: j])
         return match.end(0)
 
-    # Internal -- parse comment, return length or -1 if not terminated
-    def parse_comment(self, i, report=1):
+    # Internal -- parse comment
+    # if end is True, returns EOF location if no close tag is found, otherwise
+    # return length or -1 if not terminated
+        def parse_comment(self, i, report=1, end=False):
         rawdata = self.rawdata
         if rawdata[i:i+4] != '<!--':
             raise AssertionError('unexpected call to parse_comment()')
-        match = _commentclose.search(rawdata, i+4)
+        match = _commentclose.search(rawdata, i+2)
         if not match:
+            if end:
+                if report:
+                    self.handle_comment(rawdata[i+4:])
+                return len(rawdata)
             return -1
         if report:
             j = match.start(0)

@@ -172,7 +172,7 @@ def goahead(self, end):
                 elif startswith("</", i):
                     k = self.parse_endtag(i)
                 elif startswith("<!--", i):
-                    k = self.parse_comment(i)
+                    k = self.parse_comment(i, end=end)
                 elif startswith("<?", i):
                     k = self.parse_pi(i)
                 elif startswith("<!", i):

@@ -324,14 +324,34 @@ def test_comments(self):
                 '<!---->'
                 '<!----I have many hyphens---->'
                 '<!-- I have a > in the middle -->'
-                '<!-- and I have -- in the middle! -->')
+                '<!-- and I have -- in the middle! -->'
+                '<!--->'
+                '<!-->'
+                '<!--<!--->'
+                '<!--And I am so-called incorrectly-closed-comment--!>'
+                '<!--!>'
+                '<!---!>'
+                '<!--I have invalid attempt to close (space) -- >-->'
+                '<!--Me too (invalid character) --x>-->'
+                '<!--Me too (invalid characters) --cheese>-->'
+                '<!--EOF comment')
         expected = [('comment', " I'm a valid comment "),
                     ('comment', 'me too!'),
                     ('comment', '--'),
                     ('comment', ''),
                     ('comment', '--I have many hyphens--'),
                     ('comment', ' I have a > in the middle '),
-                    ('comment', ' and I have -- in the middle! ')]
+                    ('comment', ' and I have -- in the middle! '),
+                    ('comment', ''),
+                    ('comment', ''),
+                    ('comment', '<!-'),
+                    ('comment', 'And I am so-called incorrectly-closed-comment'),
+                    ('comment', ''),
+                    ('comment', ''),
+                    ('comment', 'I have invalid attempt to close (space) -- >'),
+                    ('comment', 'Me too (invalid character) --x>'),
+                    ('comment', 'Me too (invalid characters) --cheese>'),
+                    ('comment', 'EOF comment')]
         self._run_check(html, expected)
 
     def test_condcoms(self):

diff --git a/Misc/NEWS.d/next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst b/Misc/NEWS.d/next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst
@@ -0,0 +1 @@
+Follow the `parsing recommendation <https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment>`_ and `standard <https://html.spec.whatwg.org/#comments>`_ for closing comment tag in the :mod:`html.parser`. Increased compliance leads to predictable behavior, thus enhancing security.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Follow the `parsing recommendation <https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment>`_ and `standard <https://html.spec.whatwg.org/#comments>`_ for closing comment tag in the :mod:`html.parser`. Increased compliance leads to predictable behavior, thus enhancing security.