8000 [DomCrawler] fix HTML5 parser integration · symfony/symfony@ba83bda · GitHub
[go: up one dir, main page]

Skip to content

Commit ba83bda

Browse files
[DomCrawler] fix HTML5 parser integration
1 parent fba11b4 commit ba83bda

File tree

1 file changed

+8
-15
lines changed
< 10000 span role="status" aria-live="polite" aria-atomic="true" class="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0 brGdpi">

1 file changed

+8
-15
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ public function __construct($node = null, string $uri = null, string $baseHref =
6969
{
7070
$this->uri = $uri;
7171
$this->baseHref = $baseHref ?: $uri;
72+
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
7273

7374
$this->add($node);
7475
}
@@ -190,13 +191,7 @@ public function addContent($content, $type = null)
190191
public function addHtmlContent($content, $charset = 'UTF-8')
191192
{
192193
// Use HTML5 parser if the content is HTML5 and the library is available
193-
if (!$this->html5Parser
194-
&& class_exists(HTML5::class)
195-
&& '<!doctype html>' === strtolower(substr(ltrim($content), 0, 15))) {
196-
$this->html5Parser = new HTML5(['disable_html_ns' => true]);
197-
}
198-
199-
$dom = null !== $this->html5Parser ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset);
194+
$dom = null !== $this->html5Parser && strspn($content, " \t\r\n") === stripos($content, '<!doctype html>') ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset);
200195
$this->addDocument($dom);
201196

202197
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']);
@@ -599,18 +594,16 @@ public function html(/* $default = null */)
599594
throw new \InvalidArgumentException('The current node list is empty.');
600595
}
601596

602-
if (null !== $this->html5Parser) {
603-
$html = '';
604-
foreach ($this->getNode(0)->childNodes as $child) {
605-
$html .= $this->html5Parser->saveHTML($child);
606-
}
597+
$node = $this->getNode(0);
598+
$owner = $node->ownerDocument;
607599

608-
return $html;
600+
if (null !== $this->html5Parser && '<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
601+
$owner = $this->html5Parser;
609602
}
610603

611604
$html = '';
612-
foreach ($this->getNode(0)->childNodes as $child) {
613-
$html .= $child->ownerDocument->saveHTML($child);
605+
foreach ($node->childNodes as $child) {
606+
$html .= $owner->saveHTML($child);
614607
}
615608

616609
return $html;

0 commit comments

Comments
 (0)
0